polkavm_linker/
program_from_elf.rs

1use polkavm_common::abi::{MemoryMapBuilder, VM_CODE_ADDRESS_ALIGNMENT, VM_MAX_PAGE_SIZE, VM_MIN_PAGE_SIZE};
2use polkavm_common::cast::cast;
3use polkavm_common::program::{
4    self, FrameKind, Instruction, InstructionFormat, InstructionSet, InstructionSetKind, LineProgramOp, Opcode, ProgramBlob,
5    ProgramCounter, ProgramSymbol,
6};
7use polkavm_common::utils::{align_to_next_page_u32, align_to_next_page_u64};
8use polkavm_common::varint;
9use polkavm_common::writer::{ProgramBlobBuilder, Writer};
10
11use core::ops::Range;
12use std::borrow::Cow;
13use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
14use std::sync::Arc;
15
16use crate::dwarf::Location;
17use crate::elf::{Elf, Section, SectionIndex};
18use crate::fast_range_map::RangeMap;
19use crate::riscv::DecoderConfig;
20use crate::riscv::Reg as RReg;
21use crate::riscv::{AtomicKind, BranchKind, CmovKind, Inst, LoadKind, RegImmKind, StoreKind};
22
23#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
24#[repr(u8)]
25enum Reg {
26    // The registers supported by the VM.
27    RA = 0,
28    SP = 1,
29    T0 = 2,
30    T1 = 3,
31    T2 = 4,
32    S0 = 5,
33    S1 = 6,
34    A0 = 7,
35    A1 = 8,
36    A2 = 9,
37    A3 = 10,
38    A4 = 11,
39    A5 = 12,
40
41    // Extra fake registers. These will be stripped away before the final codegen pass.
42    E0 = 13,
43    E1 = 14,
44    E2 = 15,
45    E3 = 16,
46}
47
48impl Reg {
49    fn to_usize(self) -> usize {
50        self as usize
51    }
52}
53
54impl From<polkavm_common::program::Reg> for Reg {
55    fn from(reg: polkavm_common::program::Reg) -> Reg {
56        use polkavm_common::program::Reg as R;
57        match reg {
58            R::RA => Reg::RA,
59            R::SP => Reg::SP,
60            R::T0 => Reg::T0,
61            R::T1 => Reg::T1,
62            R::T2 => Reg::T2,
63            R::S0 => Reg::S0,
64            R::S1 => Reg::S1,
65            R::A0 => Reg::A0,
66            R::A1 => Reg::A1,
67            R::A2 => Reg::A2,
68            R::A3 => Reg::A3,
69            R::A4 => Reg::A4,
70            R::A5 => Reg::A5,
71        }
72    }
73}
74
75impl From<polkavm_common::program::RawReg> for Reg {
76    fn from(reg: polkavm_common::program::RawReg) -> Reg {
77        reg.get().into()
78    }
79}
80
81impl From<polkavm_common::program::RawReg> for RegImm {
82    fn from(reg: polkavm_common::program::RawReg) -> RegImm {
83        RegImm::Reg(reg.get().into())
84    }
85}
86
87impl Reg {
88    pub const fn from_usize(value: usize) -> Option<Reg> {
89        match value {
90            0 => Some(Reg::RA),
91            1 => Some(Reg::SP),
92            2 => Some(Reg::T0),
93            3 => Some(Reg::T1),
94            4 => Some(Reg::T2),
95            5 => Some(Reg::S0),
96            6 => Some(Reg::S1),
97            7 => Some(Reg::A0),
98            8 => Some(Reg::A1),
99            9 => Some(Reg::A2),
100            10 => Some(Reg::A3),
101            11 => Some(Reg::A4),
102            12 => Some(Reg::A5),
103            13 => Some(Reg::E0),
104            14 => Some(Reg::E1),
105            15 => Some(Reg::E2),
106            16 => Some(Reg::E3),
107            _ => None,
108        }
109    }
110
111    pub const fn name(self) -> &'static str {
112        use Reg::*;
113        match self {
114            RA => "ra",
115            SP => "sp",
116            T0 => "t0",
117            T1 => "t1",
118            T2 => "t2",
119            S0 => "s0",
120            S1 => "s1",
121            A0 => "a0",
122            A1 => "a1",
123            A2 => "a2",
124            A3 => "a3",
125            A4 => "a4",
126            A5 => "a5",
127
128            E0 => "e0",
129            E1 => "e1",
130            E2 => "e2",
131            E3 => "e3",
132        }
133    }
134
135    fn fake_register_index(self) -> Option<usize> {
136        match self {
137            Reg::E0 => Some(0),
138            Reg::E1 => Some(1),
139            Reg::E2 => Some(2),
140            Reg::E3 => Some(3),
141            _ => None,
142        }
143    }
144
145    const ALL: [Reg; 17] = {
146        use Reg::*;
147        [RA, SP, T0, T1, T2, S0, S1, A0, A1, A2, A3, A4, A5, E0, E1, E2, E3]
148    };
149
150    const FAKE: [Reg; 4] = { [Reg::E0, Reg::E1, Reg::E2, Reg::E3] };
151    const INPUT_REGS: [Reg; 9] = [Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::A4, Reg::A5, Reg::T0, Reg::T1, Reg::T2];
152    const OUTPUT_REGS: [Reg; 2] = [Reg::A0, Reg::A1];
153}
154
155polkavm_common::static_assert!(Reg::INPUT_REGS.len() == polkavm_common::program::Reg::MAXIMUM_INPUT_REGS);
156polkavm_common::static_assert!(Reg::OUTPUT_REGS.len() == polkavm_common::program::Reg::MAXIMUM_OUTPUT_REGS);
157
158#[derive(Debug)]
159pub enum ProgramFromElfErrorKind {
160    FailedToParseElf(object::read::Error),
161    FailedToParseDwarf(gimli::Error),
162    FailedToParseProgram(program::ProgramParseError),
163    UnsupportedSection(String),
164    UnsupportedRegister { reg: RReg },
165
166    Other(Cow<'static, str>),
167}
168
169impl From<object::read::Error> for ProgramFromElfError {
170    fn from(error: object::read::Error) -> Self {
171        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseElf(error))
172    }
173}
174
175impl From<gimli::Error> for ProgramFromElfError {
176    fn from(error: gimli::Error) -> Self {
177        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseDwarf(error))
178    }
179}
180
181impl From<program::ProgramParseError> for ProgramFromElfError {
182    fn from(error: program::ProgramParseError) -> Self {
183        ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseProgram(error))
184    }
185}
186
187#[derive(Debug)]
188pub struct ProgramFromElfError(ProgramFromElfErrorKind);
189
190impl From<ProgramFromElfErrorKind> for ProgramFromElfError {
191    fn from(kind: ProgramFromElfErrorKind) -> Self {
192        Self(kind)
193    }
194}
195
196impl ProgramFromElfError {
197    pub(crate) fn other(error: impl Into<Cow<'static, str>>) -> Self {
198        Self(ProgramFromElfErrorKind::Other(error.into()))
199    }
200}
201
202impl From<ProgramFromElfError> for String {
203    fn from(error: ProgramFromElfError) -> String {
204        error.to_string()
205    }
206}
207
208#[allow(clippy::std_instead_of_core)]
209impl std::error::Error for ProgramFromElfError {}
210
211impl core::fmt::Display for ProgramFromElfError {
212    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
213        match &self.0 {
214            ProgramFromElfErrorKind::FailedToParseElf(error) => write!(fmt, "failed to parse ELF file: {}", error),
215            ProgramFromElfErrorKind::FailedToParseDwarf(error) => write!(fmt, "failed to parse DWARF: {}", error),
216            ProgramFromElfErrorKind::FailedToParseProgram(error) => write!(fmt, "{}", error),
217            ProgramFromElfErrorKind::UnsupportedSection(section) => write!(fmt, "unsupported section: {}", section),
218            ProgramFromElfErrorKind::UnsupportedRegister { reg } => write!(fmt, "unsupported register: {reg}"),
219            ProgramFromElfErrorKind::Other(message) => fmt.write_str(message),
220        }
221    }
222}
223
224fn cast_reg_non_zero(reg: RReg) -> Result<Option<Reg>, ProgramFromElfError> {
225    use RReg::*;
226    match reg {
227        Zero => Ok(None),
228        RA => Ok(Some(Reg::RA)),
229        SP => Ok(Some(Reg::SP)),
230        T0 => Ok(Some(Reg::T0)),
231        T1 => Ok(Some(Reg::T1)),
232        T2 => Ok(Some(Reg::T2)),
233        S0 => Ok(Some(Reg::S0)),
234        S1 => Ok(Some(Reg::S1)),
235        A0 => Ok(Some(Reg::A0)),
236        A1 => Ok(Some(Reg::A1)),
237        A2 => Ok(Some(Reg::A2)),
238        A3 => Ok(Some(Reg::A3)),
239        A4 => Ok(Some(Reg::A4)),
240        A5 => Ok(Some(Reg::A5)),
241        GP | TP | A6 | A7 | S2 | S3 | S4 | S5 | S6 | S7 | S8 | S9 | S10 | S11 | T3 | T4 | T5 | T6 => {
242            Err(ProgramFromElfErrorKind::UnsupportedRegister { reg }.into())
243        }
244    }
245}
246
247fn cast_reg_any(reg: RReg) -> Result<RegImm, ProgramFromElfError> {
248    Ok(cast_reg_non_zero(reg)?.map_or(RegImm::Imm(0), RegImm::Reg))
249}
250
251#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
252pub(crate) struct Source {
253    pub(crate) section_index: SectionIndex,
254    pub(crate) offset_range: AddressRange,
255}
256
257impl core::fmt::Display for Source {
258    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
259        write!(
260            fmt,
261            "<{}+{}..{}>",
262            self.section_index, self.offset_range.start, self.offset_range.end
263        )
264    }
265}
266
267impl Source {
268    fn begin(&self) -> SectionTarget {
269        SectionTarget {
270            section_index: self.section_index,
271            offset: self.offset_range.start,
272        }
273    }
274
275    fn iter(&'_ self) -> impl Iterator<Item = SectionTarget> + '_ {
276        (self.offset_range.start..self.offset_range.end)
277            .step_by(2)
278            .map(|offset| SectionTarget {
279                section_index: self.section_index,
280                offset,
281            })
282    }
283}
284
285// TODO: Use smallvec.
286#[derive(Clone, Debug)]
287struct SourceStack(Vec<Source>);
288
289impl core::fmt::Display for SourceStack {
290    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
291        fmt.write_str("[")?;
292        let mut is_first = true;
293        for source in &self.0 {
294            if is_first {
295                is_first = false;
296            } else {
297                fmt.write_str(", ")?;
298            }
299            source.fmt(fmt)?;
300        }
301        fmt.write_str("]")
302    }
303}
304
305impl SourceStack {
306    fn as_slice(&self) -> &[Source] {
307        &self.0
308    }
309
310    fn top(&self) -> &Source {
311        &self.0[0]
312    }
313
314    fn overlay_on_top_of(&self, stack: &SourceStack) -> Self {
315        let mut vec = Vec::with_capacity(self.0.len() + stack.0.len());
316        vec.extend(self.0.iter().copied());
317        vec.extend(stack.0.iter().copied());
318
319        SourceStack(vec)
320    }
321
322    fn overlay_on_top_of_inplace(&mut self, stack: &SourceStack) {
323        self.0.extend(stack.0.iter().copied());
324    }
325
326    fn display(&self, section_to_function_name: &BTreeMap<SectionTarget, String>) -> String {
327        use core::fmt::Write;
328
329        let mut out = String::new();
330        out.push('[');
331        let mut is_first = true;
332        for source in &self.0 {
333            if is_first {
334                is_first = false;
335            } else {
336                out.push_str(", ");
337            }
338            write!(&mut out, "{}", source).unwrap();
339            if let Some((origin, name)) = section_to_function_name.range(..=source.begin()).next_back() {
340                if origin.section_index == source.section_index {
341                    write!(&mut out, " \"{name}\"+{}", source.offset_range.start - origin.offset).unwrap();
342                }
343            }
344        }
345        out.push(']');
346        out
347    }
348}
349
350impl From<Source> for SourceStack {
351    fn from(source: Source) -> Self {
352        SourceStack(vec![source])
353    }
354}
355
356#[derive(Clone, Debug)]
357struct EndOfBlock<T> {
358    source: SourceStack,
359    instruction: ControlInst<T>,
360}
361
362impl<T> EndOfBlock<T> {
363    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<EndOfBlock<U>, E> {
364        Ok(EndOfBlock {
365            source: self.source,
366            instruction: self.instruction.map_target(map)?,
367        })
368    }
369}
370
371#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
372pub(crate) struct AddressRange {
373    pub(crate) start: u64,
374    pub(crate) end: u64,
375}
376
377impl AddressRange {
378    pub(crate) fn is_empty(&self) -> bool {
379        self.end == self.start
380    }
381
382    pub(crate) const fn is_overlapping(&self, other: &AddressRange) -> bool {
383        !(self.end <= other.start || self.start >= other.end)
384    }
385}
386
387impl core::fmt::Display for AddressRange {
388    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
389        write!(fmt, "0x{:x}-0x{:x}", self.start, self.end)
390    }
391}
392
393impl core::fmt::Debug for AddressRange {
394    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
395        write!(fmt, "0x{:x}-0x{:x}", self.start, self.end)
396    }
397}
398
399impl From<Range<u64>> for AddressRange {
400    fn from(range: Range<u64>) -> Self {
401        AddressRange {
402            start: range.start,
403            end: range.end,
404        }
405    }
406}
407
408#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
409pub(crate) struct SectionTarget {
410    pub(crate) section_index: SectionIndex,
411    pub(crate) offset: u64,
412}
413
414impl core::fmt::Display for SectionTarget {
415    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
416        write!(fmt, "<{}+{}>", self.section_index, self.offset)
417    }
418}
419
420impl core::fmt::Debug for SectionTarget {
421    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
422        write!(fmt, "<{}+{}>", self.section_index, self.offset)
423    }
424}
425
426impl From<SectionTarget> for SectionIndex {
427    fn from(target: SectionTarget) -> Self {
428        target.section_index
429    }
430}
431
432fn extract_delimited<'a>(str: &mut &'a str, prefix: &str, suffix: &str) -> Option<(&'a str, &'a str)> {
433    let original = *str;
434    let start_of_prefix = str.find(prefix)?;
435    let start = start_of_prefix + prefix.len();
436    let end = str[start..].find(suffix)? + start;
437    *str = &str[end + suffix.len()..];
438    Some((&original[..start_of_prefix], &original[start..end]))
439}
440
441#[test]
442fn test_extract_delimited() {
443    let mut str = "foo <section #1234+567> bar";
444    assert_eq!(extract_delimited(&mut str, "<section #", ">").unwrap(), ("foo ", "1234+567"));
445    assert_eq!(str, " bar");
446}
447
448impl SectionTarget {
449    fn fmt_human_readable(&self, elf: &Elf) -> String {
450        Self::make_human_readable_in_debug_string(elf, &self.to_string())
451    }
452
453    fn make_human_readable_in_debug_string(elf: &Elf, mut str: &str) -> String {
454        // A hack-ish way to make nested `Debug` error messages more readable by replacing
455        // raw section indexes and offsets with a more human readable string.
456
457        let mut output = String::new();
458        while let Some((prefix, chunk)) = extract_delimited(&mut str, "<section #", ">") {
459            output.push_str(prefix);
460
461            let mut iter = chunk.split('+');
462            if let Some(section_index) = iter.next().and_then(|s| s.parse::<usize>().ok()) {
463                if let Some(offset) = iter.next().and_then(|s| s.parse::<u64>().ok()) {
464                    if let Some(section) = elf.section_by_raw_index(section_index) {
465                        use core::fmt::Write;
466
467                        let symbol = elf.symbols().find(|symbol| {
468                            let Ok(target) = symbol.section_target() else {
469                                return false;
470                            };
471                            section_index == target.section_index.raw()
472                                && offset >= target.offset
473                                && offset < (target.offset + symbol.size())
474                        });
475
476                        let section_name = section.name();
477                        write!(&mut output, "<section #{section_index}+{offset} ('{section_name}'").unwrap();
478                        if let Some(symbol) = symbol {
479                            if let Some(symbol_name) = symbol.name() {
480                                write!(
481                                    &mut output,
482                                    ": '{}'+{}",
483                                    symbol_name,
484                                    offset - symbol.section_target().unwrap().offset
485                                )
486                                .unwrap();
487                            }
488                        }
489                        output.push_str(")>");
490                        continue;
491                    }
492                }
493            }
494            output.push_str(chunk);
495        }
496
497        output.push_str(str);
498        output
499    }
500
501    fn add(self, offset: u64) -> Self {
502        SectionTarget {
503            section_index: self.section_index,
504            offset: self.offset + offset,
505        }
506    }
507
508    fn map_offset_i64(self, cb: impl FnOnce(i64) -> i64) -> Self {
509        let offset = self.offset as i64;
510        SectionTarget {
511            section_index: self.section_index,
512            offset: cb(offset) as u64,
513        }
514    }
515}
516
517#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
518#[repr(transparent)]
519struct BlockTarget {
520    block_index: usize,
521}
522
523impl BlockTarget {
524    fn from_raw(block_index: usize) -> Self {
525        BlockTarget { block_index }
526    }
527
528    fn index(self) -> usize {
529        self.block_index
530    }
531}
532
533#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
534enum AnyTarget {
535    Data(SectionTarget),
536    Code(BlockTarget),
537}
538
539#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
540enum RegImm {
541    Reg(Reg),
542    Imm(i32),
543}
544
545impl RegImm {
546    fn map_register(self, mut map: impl FnMut(Reg) -> Reg) -> RegImm {
547        match self {
548            RegImm::Reg(reg) => RegImm::Reg(map(reg)),
549            RegImm::Imm(value) => RegImm::Imm(value),
550        }
551    }
552}
553
554impl From<Reg> for RegImm {
555    fn from(reg: Reg) -> Self {
556        RegImm::Reg(reg)
557    }
558}
559
560impl From<i32> for RegImm {
561    fn from(value: i32) -> Self {
562        RegImm::Imm(value)
563    }
564}
565
566#[derive(Clone, PartialEq, Eq, Debug)]
567enum BasicInst<T> {
568    LoadAbsolute {
569        kind: LoadKind,
570        dst: Reg,
571        target: SectionTarget,
572    },
573    StoreAbsolute {
574        kind: StoreKind,
575        src: RegImm,
576        target: SectionTarget,
577    },
578    LoadIndirect {
579        kind: LoadKind,
580        dst: Reg,
581        base: Reg,
582        offset: i32,
583    },
584    StoreIndirect {
585        kind: StoreKind,
586        src: RegImm,
587        base: Reg,
588        offset: i32,
589    },
590    LoadAddress {
591        dst: Reg,
592        target: T,
593    },
594    // This is supposed to load the address from the GOT, instead of loading it directly as an immediate.
595    LoadAddressIndirect {
596        dst: Reg,
597        target: T,
598    },
599    LoadImmediate {
600        dst: Reg,
601        imm: i32,
602    },
603    LoadImmediate64 {
604        dst: Reg,
605        imm: i64,
606    },
607    MoveReg {
608        dst: Reg,
609        src: Reg,
610    },
611    Reg {
612        kind: RegKind,
613        dst: Reg,
614        src: Reg,
615    },
616    RegReg {
617        kind: RegRegKind,
618        dst: Reg,
619        src1: Reg,
620        src2: Reg,
621    },
622    AnyAny {
623        kind: AnyAnyKind,
624        dst: Reg,
625        src1: RegImm,
626        src2: RegImm,
627    },
628    Cmov {
629        kind: CmovKind,
630        dst: Reg,
631        src: RegImm,
632        cond: Reg,
633    },
634    Ecalli {
635        nth_import: usize,
636    },
637    Sbrk {
638        dst: Reg,
639        size: Reg,
640    },
641    Memset,
642    Nop,
643    LoadHeapBase {
644        dst: Reg,
645    },
646    Prologue {
647        stack_space: u32,
648        regs: Vec<(u32, Reg)>,
649    },
650    Epilogue {
651        stack_space: u32,
652        regs: Vec<(u32, Reg)>,
653    },
654}
655
656#[derive(Copy, Clone)]
657enum OpKind {
658    Read,
659    Write,
660    ReadWrite,
661}
662
663impl<T> BasicInst<T> {
664    fn is_nop(&self) -> bool {
665        match self {
666            BasicInst::MoveReg { dst, src } => dst == src,
667            BasicInst::Nop => true,
668            _ => false,
669        }
670    }
671
672    fn src_mask(&self, imports: &[Import]) -> RegMask {
673        match *self {
674            BasicInst::Nop
675            | BasicInst::LoadHeapBase { .. }
676            | BasicInst::LoadImmediate { .. }
677            | BasicInst::LoadImmediate64 { .. }
678            | BasicInst::LoadAbsolute { .. }
679            | BasicInst::LoadAddress { .. }
680            | BasicInst::LoadAddressIndirect { .. } => RegMask::empty(),
681            BasicInst::MoveReg { src, .. } | BasicInst::Reg { src, .. } => RegMask::from(src),
682            BasicInst::StoreAbsolute { src, .. } => RegMask::from(src),
683            BasicInst::LoadIndirect { base, .. } => RegMask::from(base),
684            BasicInst::StoreIndirect { src, base, .. } => RegMask::from(src) | RegMask::from(base),
685            BasicInst::RegReg { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
686            BasicInst::AnyAny { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
687            BasicInst::Cmov { dst, src, cond, .. } => RegMask::from(dst) | RegMask::from(src) | RegMask::from(cond),
688            BasicInst::Ecalli { nth_import } => imports[nth_import].src_mask(),
689            BasicInst::Sbrk { size, .. } => RegMask::from(size),
690            BasicInst::Memset => RegMask::from(Reg::A0) | RegMask::from(Reg::A1) | RegMask::from(Reg::A2),
691            BasicInst::Prologue { ref regs, .. } => RegMask::from(Reg::SP) | RegMask::from_regs(regs.iter().map(|&(_, reg)| reg)),
692            BasicInst::Epilogue { .. } => RegMask::from(Reg::SP),
693        }
694    }
695
696    fn dst_mask(&self, imports: &[Import]) -> RegMask {
697        match *self {
698            BasicInst::Nop | BasicInst::StoreAbsolute { .. } | BasicInst::StoreIndirect { .. } => RegMask::empty(),
699            BasicInst::MoveReg { dst, .. }
700            | BasicInst::LoadHeapBase { dst }
701            | BasicInst::LoadImmediate { dst, .. }
702            | BasicInst::LoadImmediate64 { dst, .. }
703            | BasicInst::LoadAbsolute { dst, .. }
704            | BasicInst::LoadAddress { dst, .. }
705            | BasicInst::LoadAddressIndirect { dst, .. }
706            | BasicInst::LoadIndirect { dst, .. }
707            | BasicInst::RegReg { dst, .. }
708            | BasicInst::Cmov { dst, .. }
709            | BasicInst::Reg { dst, .. }
710            | BasicInst::AnyAny { dst, .. } => RegMask::from(dst),
711            BasicInst::Ecalli { nth_import } => imports[nth_import].dst_mask(),
712            BasicInst::Sbrk { dst, .. } => RegMask::from(dst),
713            BasicInst::Memset => RegMask::from(Reg::A0) | RegMask::from(Reg::A2),
714            BasicInst::Prologue { .. } => RegMask::from(Reg::SP),
715            BasicInst::Epilogue { ref regs, .. } => RegMask::from(Reg::SP) | RegMask::from_regs(regs.iter().map(|&(_, reg)| reg)),
716        }
717    }
718
719    fn has_side_effects(&self, config: &Config) -> bool {
720        match *self {
721            BasicInst::Sbrk { .. }
722            | BasicInst::Prologue { .. }
723            | BasicInst::Ecalli { .. }
724            | BasicInst::StoreAbsolute { .. }
725            | BasicInst::StoreIndirect { .. }
726            | BasicInst::Memset => true,
727            BasicInst::LoadAbsolute { .. } | BasicInst::LoadIndirect { .. } => !config.elide_unnecessary_loads,
728            BasicInst::Nop
729            | BasicInst::Epilogue { .. }
730            | BasicInst::LoadHeapBase { .. }
731            | BasicInst::MoveReg { .. }
732            | BasicInst::Reg { .. }
733            | BasicInst::LoadImmediate { .. }
734            | BasicInst::LoadImmediate64 { .. }
735            | BasicInst::LoadAddress { .. }
736            | BasicInst::LoadAddressIndirect { .. }
737            | BasicInst::RegReg { .. }
738            | BasicInst::Cmov { .. }
739            | BasicInst::AnyAny { .. } => false,
740        }
741    }
742
743    fn map_register(self, mut map: impl FnMut(Reg, OpKind) -> Reg) -> Option<Self> {
744        // Note: ALWAYS map the inputs first; otherwise `regalloc2` might break!
745        match self {
746            BasicInst::LoadImmediate { dst, imm } => Some(BasicInst::LoadImmediate {
747                dst: map(dst, OpKind::Write),
748                imm,
749            }),
750            BasicInst::LoadImmediate64 { dst, imm } => Some(BasicInst::LoadImmediate64 {
751                dst: map(dst, OpKind::Write),
752                imm,
753            }),
754            BasicInst::LoadAbsolute { kind, dst, target } => Some(BasicInst::LoadAbsolute {
755                kind,
756                dst: map(dst, OpKind::Write),
757                target,
758            }),
759            BasicInst::StoreAbsolute { kind, src, target } => Some(BasicInst::StoreAbsolute {
760                kind,
761                src: src.map_register(|reg| map(reg, OpKind::Read)),
762                target,
763            }),
764            BasicInst::LoadAddress { dst, target } => Some(BasicInst::LoadAddress {
765                dst: map(dst, OpKind::Write),
766                target,
767            }),
768            BasicInst::LoadAddressIndirect { dst, target } => Some(BasicInst::LoadAddressIndirect {
769                dst: map(dst, OpKind::Write),
770                target,
771            }),
772            BasicInst::LoadIndirect { kind, dst, base, offset } => Some(BasicInst::LoadIndirect {
773                kind,
774                base: map(base, OpKind::Read),
775                dst: map(dst, OpKind::Write),
776                offset,
777            }),
778            BasicInst::StoreIndirect { kind, src, base, offset } => Some(BasicInst::StoreIndirect {
779                kind,
780                src: src.map_register(|reg| map(reg, OpKind::Read)),
781                base: map(base, OpKind::Read),
782                offset,
783            }),
784            BasicInst::Reg { kind, dst, src } => Some(BasicInst::Reg {
785                kind,
786                src: map(src, OpKind::Read),
787                dst: map(dst, OpKind::Write),
788            }),
789            BasicInst::RegReg { kind, dst, src1, src2 } => Some(BasicInst::RegReg {
790                kind,
791                src1: map(src1, OpKind::Read),
792                src2: map(src2, OpKind::Read),
793                dst: map(dst, OpKind::Write),
794            }),
795            BasicInst::AnyAny { kind, dst, src1, src2 } => Some(BasicInst::AnyAny {
796                kind,
797                src1: src1.map_register(|reg| map(reg, OpKind::Read)),
798                src2: src2.map_register(|reg| map(reg, OpKind::Read)),
799                dst: map(dst, OpKind::Write),
800            }),
801            BasicInst::MoveReg { dst, src } => Some(BasicInst::MoveReg {
802                src: map(src, OpKind::Read),
803                dst: map(dst, OpKind::Write),
804            }),
805            BasicInst::Cmov { kind, dst, src, cond } => Some(BasicInst::Cmov {
806                kind,
807                src: src.map_register(|reg| map(reg, OpKind::Read)),
808                cond: map(cond, OpKind::Read),
809                dst: map(dst, OpKind::ReadWrite),
810            }),
811            BasicInst::Ecalli { .. } => None,
812            BasicInst::Sbrk { dst, size } => Some(BasicInst::Sbrk {
813                size: map(size, OpKind::Read),
814                dst: map(dst, OpKind::Write),
815            }),
816            BasicInst::Memset => {
817                assert_eq!(map(Reg::A1, OpKind::Read), Reg::A1);
818                assert_eq!(map(Reg::A0, OpKind::ReadWrite), Reg::A0);
819                assert_eq!(map(Reg::A2, OpKind::ReadWrite), Reg::A2);
820                Some(BasicInst::Memset)
821            }
822            BasicInst::LoadHeapBase { dst } => Some(BasicInst::LoadHeapBase {
823                dst: map(dst, OpKind::Write),
824            }),
825            BasicInst::Nop => Some(BasicInst::Nop),
826            BasicInst::Prologue { stack_space, regs } => {
827                let output = BasicInst::Prologue {
828                    stack_space,
829                    regs: regs.into_iter().map(|(offset, reg)| (offset, map(reg, OpKind::Read))).collect(),
830                };
831
832                assert_eq!(map(Reg::SP, OpKind::ReadWrite), Reg::SP);
833                Some(output)
834            }
835            BasicInst::Epilogue { stack_space, regs } => {
836                assert_eq!(map(Reg::SP, OpKind::ReadWrite), Reg::SP);
837                let output = BasicInst::Epilogue {
838                    stack_space,
839                    regs: regs.into_iter().map(|(offset, reg)| (offset, map(reg, OpKind::Write))).collect(),
840                };
841
842                Some(output)
843            }
844        }
845    }
846
847    fn operands(&self, imports: &[Import]) -> impl Iterator<Item = (Reg, OpKind)>
848    where
849        T: Clone,
850    {
851        let mut list = [None, None, None, None, None, None, None, None];
852        let mut length = 0;
853        // Abuse the `map_register` to avoid matching on everything again.
854        let is_special_instruction = self
855            .clone()
856            .map_register(|reg, kind| {
857                list[length] = Some((reg, kind));
858                length += 1;
859                reg
860            })
861            .is_none();
862
863        if is_special_instruction {
864            assert_eq!(length, 0);
865
866            let BasicInst::Ecalli { nth_import } = *self else { unreachable!() };
867            let import = &imports[nth_import];
868
869            for reg in import.src_mask() {
870                list[length] = Some((reg, OpKind::Read));
871                length += 1;
872            }
873
874            for reg in import.dst_mask() {
875                list[length] = Some((reg, OpKind::Write));
876                length += 1;
877            }
878        };
879
880        let mut seen_dst = false;
881        list.into_iter().take_while(|reg| reg.is_some()).flatten().map(move |(reg, kind)| {
882            let is_dst = matches!(kind, OpKind::Write | OpKind::ReadWrite);
883
884            // Sanity check to make sure inputs always come before outputs, so that `regalloc2` doesn't break.
885            if seen_dst {
886                assert!(is_dst);
887            }
888            seen_dst |= is_dst;
889
890            (reg, kind)
891        })
892    }
893
894    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<BasicInst<U>, E> {
895        Ok(match self {
896            BasicInst::MoveReg { dst, src } => BasicInst::MoveReg { dst, src },
897            BasicInst::LoadImmediate { dst, imm } => BasicInst::LoadImmediate { dst, imm },
898            BasicInst::LoadImmediate64 { dst, imm } => BasicInst::LoadImmediate64 { dst, imm },
899            BasicInst::LoadAbsolute { kind, dst, target } => BasicInst::LoadAbsolute { kind, dst, target },
900            BasicInst::StoreAbsolute { kind, src, target } => BasicInst::StoreAbsolute { kind, src, target },
901            BasicInst::LoadAddress { dst, target } => BasicInst::LoadAddress { dst, target: map(target)? },
902            BasicInst::LoadAddressIndirect { dst, target } => BasicInst::LoadAddressIndirect { dst, target: map(target)? },
903            BasicInst::LoadIndirect { kind, dst, base, offset } => BasicInst::LoadIndirect { kind, dst, base, offset },
904            BasicInst::StoreIndirect { kind, src, base, offset } => BasicInst::StoreIndirect { kind, src, base, offset },
905            BasicInst::Reg { kind, dst, src } => BasicInst::Reg { kind, dst, src },
906            BasicInst::RegReg { kind, dst, src1, src2 } => BasicInst::RegReg { kind, dst, src1, src2 },
907            BasicInst::AnyAny { kind, dst, src1, src2 } => BasicInst::AnyAny { kind, dst, src1, src2 },
908            BasicInst::Cmov { kind, dst, src, cond } => BasicInst::Cmov { kind, dst, src, cond },
909            BasicInst::Ecalli { nth_import } => BasicInst::Ecalli { nth_import },
910            BasicInst::Sbrk { dst, size } => BasicInst::Sbrk { dst, size },
911            BasicInst::LoadHeapBase { dst } => BasicInst::LoadHeapBase { dst },
912            BasicInst::Memset => BasicInst::Memset,
913            BasicInst::Nop => BasicInst::Nop,
914            BasicInst::Prologue { stack_space, regs } => BasicInst::Prologue { stack_space, regs },
915            BasicInst::Epilogue { stack_space, regs } => BasicInst::Epilogue { stack_space, regs },
916        })
917    }
918
919    fn target(&self) -> (Option<SectionTarget>, Option<T>)
920    where
921        T: Copy,
922    {
923        match self {
924            BasicInst::LoadAbsolute { target, .. } | BasicInst::StoreAbsolute { target, .. } => (Some(*target), None),
925            BasicInst::LoadAddress { target, .. } | BasicInst::LoadAddressIndirect { target, .. } => (None, Some(*target)),
926            BasicInst::Nop
927            | BasicInst::Prologue { .. }
928            | BasicInst::Epilogue { .. }
929            | BasicInst::LoadHeapBase { .. }
930            | BasicInst::MoveReg { .. }
931            | BasicInst::LoadImmediate { .. }
932            | BasicInst::LoadImmediate64 { .. }
933            | BasicInst::LoadIndirect { .. }
934            | BasicInst::StoreIndirect { .. }
935            | BasicInst::Reg { .. }
936            | BasicInst::RegReg { .. }
937            | BasicInst::AnyAny { .. }
938            | BasicInst::Cmov { .. }
939            | BasicInst::Sbrk { .. }
940            | BasicInst::Memset
941            | BasicInst::Ecalli { .. } => (None, None),
942        }
943    }
944}
945
946#[derive(Copy, Clone, PartialEq, Eq, Debug)]
947enum ControlInst<T> {
948    Jump {
949        target: T,
950    },
951    Call {
952        ra: Reg,
953        target: T,
954        target_return: T,
955    },
956    JumpIndirect {
957        base: Reg,
958        offset: i64,
959    },
960    CallIndirect {
961        ra: Reg,
962        base: Reg,
963        offset: i64,
964        target_return: T,
965    },
966    Branch {
967        kind: BranchKind,
968        src1: RegImm,
969        src2: RegImm,
970        target_true: T,
971        target_false: T,
972    },
973    Unimplemented,
974}
975
976impl<T> ControlInst<T> {
977    fn src_mask(&self) -> RegMask {
978        match *self {
979            ControlInst::Jump { .. } | ControlInst::Call { .. } | ControlInst::Unimplemented => RegMask::empty(),
980            ControlInst::JumpIndirect { base, .. } | ControlInst::CallIndirect { base, .. } => RegMask::from(base),
981            ControlInst::Branch { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2),
982        }
983    }
984
985    fn dst_mask(&self) -> RegMask {
986        match *self {
987            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Branch { .. } | ControlInst::Unimplemented => {
988                RegMask::empty()
989            }
990            ControlInst::Call { ra, .. } | ControlInst::CallIndirect { ra, .. } => RegMask::from(ra),
991        }
992    }
993
994    fn map_target<U, E>(self, map: impl Fn(T) -> Result<U, E>) -> Result<ControlInst<U>, E> {
995        Ok(match self {
996            ControlInst::Jump { target } => ControlInst::Jump { target: map(target)? },
997            ControlInst::Call { ra, target, target_return } => ControlInst::Call {
998                ra,
999                target: map(target)?,
1000                target_return: map(target_return)?,
1001            },
1002            ControlInst::JumpIndirect { base, offset } => ControlInst::JumpIndirect { base, offset },
1003            ControlInst::CallIndirect {
1004                ra,
1005                base,
1006                offset,
1007                target_return,
1008            } => ControlInst::CallIndirect {
1009                ra,
1010                base,
1011                offset,
1012                target_return: map(target_return)?,
1013            },
1014            ControlInst::Branch {
1015                kind,
1016                src1,
1017                src2,
1018                target_true,
1019                target_false,
1020            } => ControlInst::Branch {
1021                kind,
1022                src1,
1023                src2,
1024                target_true: map(target_true)?,
1025                target_false: map(target_false)?,
1026            },
1027            ControlInst::Unimplemented => ControlInst::Unimplemented,
1028        })
1029    }
1030
1031    fn targets(&self) -> [Option<&T>; 2] {
1032        match self {
1033            ControlInst::Jump { target, .. } => [Some(target), None],
1034            ControlInst::Call { target, target_return, .. } => [Some(target), Some(target_return)],
1035            ControlInst::CallIndirect { target_return, .. } => [Some(target_return), None],
1036            ControlInst::Branch {
1037                target_true, target_false, ..
1038            } => [Some(target_true), Some(target_false)],
1039            ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => [None, None],
1040        }
1041    }
1042
1043    fn fallthrough_target(&self) -> Option<T>
1044    where
1045        T: Copy,
1046    {
1047        match self {
1048            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => None,
1049            ControlInst::Branch { target_false: target, .. }
1050            | ControlInst::Call { target_return: target, .. }
1051            | ControlInst::CallIndirect { target_return: target, .. } => Some(*target),
1052        }
1053    }
1054
1055    fn fallthrough_target_mut(&mut self) -> Option<&mut T> {
1056        match self {
1057            ControlInst::Jump { .. } | ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => None,
1058            ControlInst::Branch { target_false: target, .. }
1059            | ControlInst::Call { target_return: target, .. }
1060            | ControlInst::CallIndirect { target_return: target, .. } => Some(target),
1061        }
1062    }
1063}
1064
1065#[derive(Clone, Debug)]
1066enum InstExt<BasicT, ControlT> {
1067    Basic(BasicInst<BasicT>),
1068    Control(ControlInst<ControlT>),
1069}
1070
1071impl<BasicT, ControlT> InstExt<BasicT, ControlT> {
1072    fn nop() -> Self {
1073        InstExt::Basic(BasicInst::Nop)
1074    }
1075}
1076
1077impl<BasicT, ControlT> From<BasicInst<BasicT>> for InstExt<BasicT, ControlT> {
1078    fn from(inst: BasicInst<BasicT>) -> Self {
1079        InstExt::Basic(inst)
1080    }
1081}
1082
1083impl<BasicT, ControlT> From<ControlInst<ControlT>> for InstExt<BasicT, ControlT> {
1084    fn from(inst: ControlInst<ControlT>) -> Self {
1085        InstExt::Control(inst)
1086    }
1087}
1088
1089#[derive(Debug)]
1090struct BasicBlock<BasicT, ControlT> {
1091    target: BlockTarget,
1092    source: Source,
1093    ops: Vec<(SourceStack, BasicInst<BasicT>)>,
1094    next: EndOfBlock<ControlT>,
1095    is_function: bool,
1096    is_unlikely: bool,
1097}
1098
1099impl<BasicT, ControlT> BasicBlock<BasicT, ControlT> {
1100    fn new(
1101        target: BlockTarget,
1102        source: Source,
1103        ops: Vec<(SourceStack, BasicInst<BasicT>)>,
1104        next: EndOfBlock<ControlT>,
1105        is_function: bool,
1106    ) -> Self {
1107        Self {
1108            target,
1109            source,
1110            ops,
1111            next,
1112            is_function,
1113            is_unlikely: false,
1114        }
1115    }
1116}
1117
1118fn split_function_name(name: &str) -> (String, String) {
1119    let (with_hash, without_hash) = if let Ok(name) = rustc_demangle::try_demangle(name) {
1120        (name.to_string(), format!("{:#}", name))
1121    } else {
1122        (name.to_owned(), name.to_owned())
1123    };
1124
1125    // Ideally we'd parse the symbol into an actual AST and use that,
1126    // but that's a lot of work, so for now let's just do it like this.
1127    //
1128    // Here we want to split the symbol into two parts: the namespace, and the name + hash.
1129    // The idea being that multiple symbols most likely share the namespcae, allowing us to
1130    // deduplicate those strings in the output blob.
1131    //
1132    // For example, this symbol:
1133    //   _ZN5alloc7raw_vec19RawVec$LT$T$C$A$GT$7reserve21do_reserve_and_handle17hddecba91f804dbebE
1134    // can be demangled into these:
1135    //   with_hash    = "alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle::hddecba91f804dbeb"
1136    //   without_hash = "alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle"
1137    //
1138    // So what we want is to split it in two like this:
1139    //   prefix = "alloc::raw_vec::RawVec<T,A>::reserve"
1140    //   suffix = "do_reserve_and_handle::hddecba91f804dbeb"
1141
1142    if with_hash.contains("::") {
1143        let suffix_index = {
1144            let mut found = None;
1145            let mut depth = 0;
1146            let mut last = '\0';
1147            let mut index = without_hash.len();
1148            for ch in without_hash.chars().rev() {
1149                if ch == '>' {
1150                    depth += 1;
1151                } else if ch == '<' {
1152                    depth -= 1;
1153                } else if ch == ':' && depth == 0 && last == ':' {
1154                    found = Some(index + 1);
1155                    break;
1156                }
1157
1158                last = ch;
1159                index -= ch.len_utf8();
1160            }
1161
1162            found
1163        };
1164
1165        if let Some(suffix_index) = suffix_index {
1166            let prefix = &with_hash[..suffix_index - 2];
1167            let suffix = &with_hash[suffix_index..];
1168            return (prefix.to_owned(), suffix.to_owned());
1169        } else {
1170            log::warn!("Failed to split symbol: {:?}", with_hash);
1171        }
1172    }
1173
1174    (String::new(), with_hash)
1175}
1176
1177#[derive(Clone, Debug)]
1178enum DataRef {
1179    Section { section_index: SectionIndex, range: Range<usize> },
1180    Padding(usize),
1181}
1182
1183impl DataRef {
1184    fn size(&self) -> usize {
1185        match self {
1186            Self::Section { range, .. } => range.len(),
1187            Self::Padding(size) => *size,
1188        }
1189    }
1190}
1191
1192#[derive(Debug)]
1193struct MemoryConfig {
1194    ro_data: Vec<DataRef>,
1195    rw_data: Vec<DataRef>,
1196    ro_data_size: u32,
1197    rw_data_size: u32,
1198    min_stack_size: u32,
1199    heap_base: u32,
1200}
1201
1202fn get_padding(memory_end: u64, align: u64) -> Option<u64> {
1203    let misalignment = memory_end % align;
1204    if misalignment == 0 {
1205        None
1206    } else {
1207        Some(align - misalignment)
1208    }
1209}
1210
1211fn process_sections(
1212    elf: &Elf,
1213    current_address: &mut u64,
1214    chunks: &mut Vec<DataRef>,
1215    base_address_for_section: &mut HashMap<SectionIndex, u64>,
1216    sections: impl IntoIterator<Item = SectionIndex>,
1217) -> u64 {
1218    for section_index in sections {
1219        let section = elf.section_by_index(section_index);
1220        assert!(section.size() >= section.data().len() as u64);
1221
1222        if let Some(padding) = get_padding(*current_address, section.align()) {
1223            *current_address += padding;
1224            chunks.push(DataRef::Padding(padding as usize));
1225        }
1226
1227        let section_name = section.name();
1228        let section_base_address = *current_address;
1229        base_address_for_section.insert(section.index(), section_base_address);
1230
1231        *current_address += section.size();
1232        if !section.data().is_empty() {
1233            chunks.push(DataRef::Section {
1234                section_index: section.index(),
1235                range: 0..section.data().len(),
1236            });
1237        }
1238
1239        let padding = section.size() - section.data().len() as u64;
1240        if padding > 0 {
1241            chunks.push(DataRef::Padding(padding.try_into().expect("overflow")))
1242        }
1243
1244        log::trace!(
1245            "Found section: '{}', original range = 0x{:x}..0x{:x} (relocated to: 0x{:x}..0x{:x}), size = 0x{:x}/0x{:x}",
1246            section_name,
1247            section.original_address(),
1248            section.original_address() + section.size(),
1249            section_base_address,
1250            section_base_address + section.size(),
1251            section.data().len(),
1252            section.size(),
1253        );
1254    }
1255
1256    let size_in_memory: u64 = chunks.iter().map(|chunk| chunk.size() as u64).sum();
1257    while let Some(DataRef::Padding(..)) = chunks.last() {
1258        chunks.pop();
1259    }
1260
1261    *current_address = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), *current_address).expect("overflow");
1262    // Add a guard page between this section and the next one.
1263    *current_address += u64::from(VM_MAX_PAGE_SIZE);
1264
1265    size_in_memory
1266}
1267
1268#[allow(clippy::too_many_arguments)]
1269fn extract_memory_config(
1270    elf: &Elf,
1271    sections_ro_data: &[SectionIndex],
1272    sections_rw_data: &[SectionIndex],
1273    sections_bss: &[SectionIndex],
1274    sections_min_stack_size: &[SectionIndex],
1275    base_address_for_section: &mut HashMap<SectionIndex, u64>,
1276    mut min_stack_size: u32,
1277) -> Result<MemoryConfig, ProgramFromElfError> {
1278    let mut current_address = u64::from(VM_MAX_PAGE_SIZE);
1279
1280    let mut ro_data = Vec::new();
1281    let mut rw_data = Vec::new();
1282    let ro_data_address = current_address;
1283    let ro_data_size = process_sections(
1284        elf,
1285        &mut current_address,
1286        &mut ro_data,
1287        base_address_for_section,
1288        sections_ro_data.iter().copied(),
1289    );
1290    let rw_data_address = current_address;
1291    let rw_data_size = process_sections(
1292        elf,
1293        &mut current_address,
1294        &mut rw_data,
1295        base_address_for_section,
1296        sections_rw_data.iter().copied().chain(sections_bss.iter().copied()),
1297    );
1298
1299    for &section_index in sections_min_stack_size {
1300        let section = elf.section_by_index(section_index);
1301        let data = section.data();
1302        if data.len() % 4 != 0 {
1303            return Err(ProgramFromElfError::other(format!("section '{}' has invalid size", section.name())));
1304        }
1305
1306        for xs in data.chunks_exact(4) {
1307            let value = u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]);
1308            min_stack_size = core::cmp::max(min_stack_size, value);
1309        }
1310    }
1311
1312    let min_stack_size =
1313        align_to_next_page_u32(VM_MIN_PAGE_SIZE, min_stack_size).ok_or(ProgramFromElfError::other("out of range size for the stack"))?;
1314
1315    log::trace!("Configured minimum stack size: 0x{min_stack_size:x}");
1316
1317    let ro_data_size = u32::try_from(ro_data_size).expect("overflow");
1318    let rw_data_size = u32::try_from(rw_data_size).expect("overflow");
1319
1320    // Sanity check that the memory configuration is actually valid.
1321    let heap_base = {
1322        let rw_data_size_physical: u64 = rw_data.iter().map(|x| x.size() as u64).sum();
1323        let rw_data_size_physical = u32::try_from(rw_data_size_physical).expect("overflow");
1324        assert!(rw_data_size_physical <= rw_data_size);
1325
1326        let config = match MemoryMapBuilder::new(VM_MAX_PAGE_SIZE)
1327            .ro_data_size(ro_data_size)
1328            .rw_data_size(rw_data_size)
1329            .stack_size(min_stack_size)
1330            .build()
1331        {
1332            Ok(config) => config,
1333            Err(error) => {
1334                return Err(ProgramFromElfError::other(error));
1335            }
1336        };
1337
1338        assert_eq!(u64::from(config.ro_data_address()), ro_data_address);
1339        assert_eq!(u64::from(config.rw_data_address()), rw_data_address);
1340
1341        config.heap_base()
1342    };
1343
1344    let memory_config = MemoryConfig {
1345        ro_data,
1346        rw_data,
1347        ro_data_size,
1348        rw_data_size,
1349        min_stack_size,
1350        heap_base,
1351    };
1352
1353    Ok(memory_config)
1354}
1355
1356#[derive(Clone, PartialEq, Eq, Debug, Hash)]
1357struct ExternMetadata {
1358    index: Option<u32>,
1359    symbol: Vec<u8>,
1360    input_regs: u8,
1361    output_regs: u8,
1362}
1363
1364#[derive(Clone, PartialEq, Eq, Debug)]
1365struct Export {
1366    location: SectionTarget,
1367    metadata: ExternMetadata,
1368}
1369
1370fn extract_exports(
1371    elf: &Elf,
1372    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1373    section: &Section,
1374) -> Result<Vec<Export>, ProgramFromElfError> {
1375    let mut b = polkavm_common::elf::Reader::from(section.data());
1376    let mut exports = Vec::new();
1377    loop {
1378        let Ok(version) = b.read_byte() else { break };
1379
1380        if version != 1 {
1381            return Err(ProgramFromElfError::other(format!(
1382                "failed to parse export metadata: unsupported export metadata version: {}",
1383                version
1384            )));
1385        }
1386
1387        let metadata = {
1388            let location = SectionTarget {
1389                section_index: section.index(),
1390                offset: b.offset() as u64,
1391            };
1392
1393            // Ignore the address as written; we'll just use the relocations instead.
1394            let address = if elf.is_64() { b.read_u64() } else { b.read_u32().map(u64::from) };
1395            let address = address.map_err(|error| ProgramFromElfError::other(format!("failed to parse export metadata: {}", error)))?;
1396
1397            let Some(relocation) = relocations.get(&location) else {
1398                return Err(ProgramFromElfError::other(format!(
1399                    "found an export without a relocation for a pointer to the metadata at {location} (found address = 0x{address:x})"
1400                )));
1401            };
1402
1403            let target = match relocation {
1404                RelocationKind::Abs {
1405                    target,
1406                    size: RelocationSize::U64,
1407                } if elf.is_64() => target,
1408                RelocationKind::Abs {
1409                    target,
1410                    size: RelocationSize::U32,
1411                } if !elf.is_64() => target,
1412                _ => {
1413                    return Err(ProgramFromElfError::other(format!(
1414                        "found an export with an unexpected relocation at {location}: {relocation:?}"
1415                    )));
1416                }
1417            };
1418
1419            parse_extern_metadata(elf, relocations, *target)?
1420        };
1421
1422        let location = SectionTarget {
1423            section_index: section.index(),
1424            offset: b.offset() as u64,
1425        };
1426
1427        // Ignore the address as written; we'll just use the relocations instead.
1428        let error = if elf.is_64() { b.read_u64().err() } else { b.read_u32().err() };
1429
1430        if let Some(error) = error {
1431            return Err(ProgramFromElfError::other(format!("failed to parse export metadata: {}", error)));
1432        }
1433
1434        let Some(relocation) = relocations.get(&location) else {
1435            return Err(ProgramFromElfError::other(format!(
1436                "found an export without a relocation for a pointer to the code at {location}"
1437            )));
1438        };
1439
1440        let target = match relocation {
1441            RelocationKind::Abs {
1442                target,
1443                size: RelocationSize::U64,
1444            } if elf.is_64() => target,
1445            RelocationKind::Abs {
1446                target,
1447                size: RelocationSize::U32,
1448            } if !elf.is_64() => target,
1449            _ => {
1450                return Err(ProgramFromElfError::other(format!(
1451                    "found an export with an unexpected relocation at {location}: {relocation:?}"
1452                )));
1453            }
1454        };
1455
1456        exports.push(Export {
1457            location: *target,
1458            metadata,
1459        });
1460    }
1461
1462    Ok(exports)
1463}
1464
1465#[derive(Clone, Debug)]
1466struct Import {
1467    metadata: ExternMetadata,
1468}
1469
1470impl core::ops::Deref for Import {
1471    type Target = ExternMetadata;
1472    fn deref(&self) -> &Self::Target {
1473        &self.metadata
1474    }
1475}
1476
1477impl Import {
1478    fn src(&'_ self) -> impl Iterator<Item = Reg> + '_ {
1479        assert!(self.metadata.input_regs as usize <= Reg::INPUT_REGS.len());
1480        Reg::INPUT_REGS
1481            .into_iter()
1482            .take(self.metadata.input_regs as usize)
1483            .chain(core::iter::once(Reg::SP))
1484    }
1485
1486    fn src_mask(&self) -> RegMask {
1487        let mut mask = RegMask::empty();
1488        for reg in self.src() {
1489            mask.insert(reg);
1490        }
1491
1492        mask
1493    }
1494
1495    #[allow(clippy::unused_self)]
1496    fn dst(&self) -> impl Iterator<Item = Reg> {
1497        assert!(self.metadata.output_regs as usize <= Reg::OUTPUT_REGS.len());
1498        [Reg::T0, Reg::T1, Reg::T2, Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::A4, Reg::A5].into_iter()
1499    }
1500
1501    fn dst_mask(&self) -> RegMask {
1502        let mut mask = RegMask::empty();
1503        for reg in self.dst() {
1504            mask.insert(reg);
1505        }
1506
1507        mask
1508    }
1509}
1510
1511fn parse_extern_metadata_impl(
1512    elf: &Elf,
1513    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1514    target: SectionTarget,
1515) -> Result<ExternMetadata, String> {
1516    let section = elf.section_by_index(target.section_index);
1517    let mut b = polkavm_common::elf::Reader::from(section.data());
1518
1519    // Skip `sh_offset` bytes:
1520    let _ = b.read(target.offset as usize)?;
1521
1522    let version = b.read_byte()?;
1523    if version != 1 && version != 2 {
1524        return Err(format!("unsupported extern metadata version: '{version}' (expected '1' or '2')"));
1525    }
1526
1527    let flags = b.read_u32()?;
1528    let symbol_length = b.read_u32()?;
1529    let Some(symbol_relocation) = relocations.get(&SectionTarget {
1530        section_index: section.index(),
1531        offset: b.offset() as u64,
1532    }) else {
1533        return Err("missing relocation for the symbol".into());
1534    };
1535
1536    // Ignore the address as written; we'll just use the relocations instead.
1537    if elf.is_64() {
1538        b.read_u64()?;
1539    } else {
1540        b.read_u32()?;
1541    };
1542
1543    let symbol_location = match symbol_relocation {
1544        RelocationKind::Abs {
1545            target,
1546            size: RelocationSize::U64,
1547        } if elf.is_64() => target,
1548        RelocationKind::Abs {
1549            target,
1550            size: RelocationSize::U32,
1551        } if !elf.is_64() => target,
1552        _ => return Err(format!("unexpected relocation for the symbol: {symbol_relocation:?}")),
1553    };
1554
1555    let Some(symbol) = elf
1556        .section_by_index(symbol_location.section_index)
1557        .data()
1558        .get(symbol_location.offset as usize..symbol_location.offset.saturating_add(u64::from(symbol_length)) as usize)
1559    else {
1560        return Err("symbol out of bounds".into());
1561    };
1562
1563    let input_regs = b.read_byte()?;
1564    if input_regs as usize > Reg::INPUT_REGS.len() {
1565        return Err(format!("too many input registers: {input_regs}"));
1566    }
1567
1568    let output_regs = b.read_byte()?;
1569    if output_regs as usize > Reg::OUTPUT_REGS.len() {
1570        return Err(format!("too many output registers: {output_regs}"));
1571    }
1572
1573    let index = if version >= 2 {
1574        let has_index = b.read_byte()?;
1575        let index = b.read_u32()?;
1576        if has_index > 0 {
1577            Some(index)
1578        } else {
1579            None
1580        }
1581    } else {
1582        None
1583    };
1584
1585    if flags != 0 {
1586        return Err(format!("found unsupported flags: 0x{flags:x}"));
1587    }
1588
1589    Ok(ExternMetadata {
1590        index,
1591        symbol: symbol.to_owned(),
1592        input_regs,
1593        output_regs,
1594    })
1595}
1596
1597fn parse_extern_metadata(
1598    elf: &Elf,
1599    relocations: &BTreeMap<SectionTarget, RelocationKind>,
1600    target: SectionTarget,
1601) -> Result<ExternMetadata, ProgramFromElfError> {
1602    parse_extern_metadata_impl(elf, relocations, target)
1603        .map_err(|error| ProgramFromElfError::other(format!("failed to parse extern metadata: {}", error)))
1604}
1605
1606fn check_imports_and_assign_indexes(imports: &mut Vec<Import>, used_imports: &HashSet<usize>) -> Result<(), ProgramFromElfError> {
1607    let mut import_by_symbol: HashMap<Vec<u8>, usize> = HashMap::new();
1608    for (nth_import, import) in imports.iter().enumerate() {
1609        if let Some(&old_nth_import) = import_by_symbol.get(&import.metadata.symbol) {
1610            let old_import = &imports[old_nth_import];
1611            if import.metadata == old_import.metadata {
1612                continue;
1613            }
1614
1615            return Err(ProgramFromElfError::other(format!(
1616                "duplicate imports with the same symbol yet different prototype: {}",
1617                ProgramSymbol::new(&*import.metadata.symbol)
1618            )));
1619        }
1620
1621        import_by_symbol.insert(import.metadata.symbol.clone(), nth_import);
1622    }
1623
1624    if imports.iter().any(|import| import.metadata.index.is_some()) {
1625        let mut import_by_index: HashMap<u32, ExternMetadata> = HashMap::new();
1626        let mut max_index = 0;
1627        for import in &*imports {
1628            if let Some(index) = import.index {
1629                if let Some(old_metadata) = import_by_index.get(&index) {
1630                    if *old_metadata != import.metadata {
1631                        return Err(ProgramFromElfError::other(format!(
1632                            "duplicate imports with the same index yet different prototypes: {}, {}",
1633                            ProgramSymbol::new(&*old_metadata.symbol),
1634                            ProgramSymbol::new(&*import.metadata.symbol)
1635                        )));
1636                    }
1637                } else {
1638                    import_by_index.insert(index, import.metadata.clone());
1639                }
1640
1641                max_index = core::cmp::max(max_index, index);
1642            } else {
1643                return Err(ProgramFromElfError::other(format!(
1644                    "import without a specified index: {}",
1645                    ProgramSymbol::new(&*import.metadata.symbol)
1646                )));
1647            }
1648        }
1649
1650        // If there are any holes in the indexes then insert dummy imports.
1651        for index in 0..max_index {
1652            if !import_by_index.contains_key(&index) {
1653                imports.push(Import {
1654                    metadata: ExternMetadata {
1655                        index: Some(index),
1656                        symbol: Vec::new(),
1657                        input_regs: 0,
1658                        output_regs: 0,
1659                    },
1660                })
1661            }
1662        }
1663    } else {
1664        let mut ordered: Vec<_> = used_imports.iter().copied().collect();
1665        ordered.sort_by(|&a, &b| imports[a].metadata.symbol.cmp(&imports[b].metadata.symbol));
1666
1667        for (assigned_index, &nth_import) in ordered.iter().enumerate() {
1668            imports[nth_import].metadata.index = Some(assigned_index as u32);
1669        }
1670    }
1671
1672    for import in imports {
1673        log::debug!(
1674            "Import: '{}', index = {:?}, input regs = {}, output regs = {}",
1675            String::from_utf8_lossy(&import.metadata.symbol),
1676            import.metadata.index,
1677            import.metadata.input_regs,
1678            import.metadata.output_regs
1679        );
1680    }
1681
1682    Ok(())
1683}
1684
1685fn get_relocation_target(elf: &Elf, relocation: &crate::elf::Relocation) -> Result<Option<SectionTarget>, ProgramFromElfError> {
1686    match relocation.target() {
1687        object::RelocationTarget::Absolute => {
1688            if let object::RelocationFlags::Elf { r_type } = relocation.flags() {
1689                if r_type == object::elf::R_RISCV_NONE {
1690                    // GNU ld apparently turns R_RISCV_ALIGN and R_RISCV_RELAX into these.
1691                    return Ok(None);
1692                }
1693            }
1694            // Example of such relocation:
1695            //   Offset     Info    Type                Sym. Value  Symbol's Name + Addend
1696            //   00060839  00000001 R_RISCV_32                        0
1697            //
1698            // So far I've only seen these emitted for `.debug_info`.
1699            //
1700            // I'm not entirely sure what's the point of those, as they don't point to any symbol
1701            // and have an addend of zero.
1702            assert_eq!(relocation.addend(), 0);
1703            assert!(!relocation.has_implicit_addend());
1704            Ok(None)
1705        }
1706        object::RelocationTarget::Symbol(target_symbol_index) => {
1707            let target_symbol = elf
1708                .symbol_by_index(target_symbol_index)
1709                .map_err(|error| ProgramFromElfError::other(format!("failed to fetch relocation target: {}", error)))?;
1710
1711            let SectionTarget { section_index, offset } = target_symbol.section_target()?;
1712            let section = elf.section_by_index(section_index);
1713            log::trace!(
1714                "Fetched relocation target: target section = \"{}\", target symbol = \"{}\" (#{}, 0x{:x}), symbol offset = 0x{:x} + 0x{:x}",
1715                section.name(),
1716                target_symbol.name().unwrap_or(""),
1717                target_symbol_index.0,
1718                target_symbol.original_address(),
1719                offset,
1720                relocation.addend(),
1721            );
1722
1723            let Some(offset) = offset.checked_add_signed(relocation.addend()) else {
1724                return Err(ProgramFromElfError::other(
1725                    "failed to add addend to the symbol's offset due to overflow",
1726                ));
1727            };
1728
1729            Ok(Some(SectionTarget {
1730                section_index: section.index(),
1731                offset,
1732            }))
1733        }
1734        _ => Err(ProgramFromElfError::other(format!(
1735            "unsupported target for relocation: {:?}",
1736            relocation
1737        ))),
1738    }
1739}
1740
1741enum MinMax {
1742    MaxSigned,
1743    MinSigned,
1744    MaxUnsigned,
1745    MinUnsigned,
1746
1747    MaxSigned64,
1748    MinSigned64,
1749    MaxUnsigned64,
1750    MinUnsigned64,
1751}
1752
1753fn emit_minmax(
1754    kind: MinMax,
1755    dst: Reg,
1756    src1: Option<Reg>,
1757    src2: Option<Reg>,
1758    tmp: Reg,
1759    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1760) {
1761    // This is supposed to emit something like this:
1762    //   tmp = src1 ? src2
1763    //   dst = src1
1764    //   dst = src2 if tmp == 0
1765
1766    assert_ne!(dst, tmp);
1767    assert_ne!(Some(tmp), src1);
1768    assert_ne!(Some(tmp), src2);
1769    assert_ne!(Some(dst), src2);
1770
1771    let (cmp_src1, cmp_src2, cmp_kind) = match kind {
1772        MinMax::MinUnsigned => (src1, src2, AnyAnyKind::SetLessThanUnsigned32),
1773        MinMax::MaxUnsigned => (src2, src1, AnyAnyKind::SetLessThanUnsigned32),
1774        MinMax::MinSigned => (src1, src2, AnyAnyKind::SetLessThanSigned32),
1775        MinMax::MaxSigned => (src2, src1, AnyAnyKind::SetLessThanSigned32),
1776        MinMax::MinUnsigned64 => (src1, src2, AnyAnyKind::SetLessThanUnsigned64),
1777        MinMax::MaxUnsigned64 => (src2, src1, AnyAnyKind::SetLessThanUnsigned64),
1778        MinMax::MinSigned64 => (src1, src2, AnyAnyKind::SetLessThanSigned64),
1779        MinMax::MaxSigned64 => (src2, src1, AnyAnyKind::SetLessThanSigned64),
1780    };
1781
1782    emit(InstExt::Basic(BasicInst::AnyAny {
1783        kind: cmp_kind,
1784        dst: tmp,
1785        src1: cmp_src1.map_or(RegImm::Imm(0), RegImm::Reg),
1786        src2: cmp_src2.map_or(RegImm::Imm(0), RegImm::Reg),
1787    }));
1788
1789    if let Some(src1) = src1 {
1790        emit(InstExt::Basic(BasicInst::MoveReg { dst, src: src1 }));
1791    } else {
1792        emit(InstExt::Basic(BasicInst::LoadImmediate { dst: tmp, imm: 0 }));
1793    }
1794
1795    emit(InstExt::Basic(BasicInst::Cmov {
1796        kind: CmovKind::EqZero,
1797        dst,
1798        src: src2.map_or(RegImm::Imm(0), RegImm::Reg),
1799        cond: tmp,
1800    }));
1801}
1802
1803fn resolve_simple_zero_register_usage(
1804    kind: crate::riscv::RegRegKind,
1805    dst: Reg,
1806    src1: RReg,
1807    src2: RReg,
1808    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1809) -> bool {
1810    use crate::riscv::RegRegKind as K;
1811    if kind == K::OrInverted && src1 == RReg::Zero && src2 != RReg::Zero {
1812        emit(InstExt::Basic(BasicInst::AnyAny {
1813            kind: AnyAnyKind::Xor32,
1814            dst,
1815            src1: RegImm::Imm(!0),
1816            src2: cast_reg_any(src2).unwrap(),
1817        }));
1818        return true;
1819    }
1820
1821    if kind == K::Xnor && src1 == RReg::Zero && src2 != RReg::Zero {
1822        emit(InstExt::Basic(BasicInst::AnyAny {
1823            kind: AnyAnyKind::Xor32,
1824            dst,
1825            src1: RegImm::Imm(!0),
1826            src2: cast_reg_any(src2).unwrap(),
1827        }));
1828        return true;
1829    }
1830
1831    if kind == K::Xnor && src1 != RReg::Zero && src2 == RReg::Zero {
1832        emit(InstExt::Basic(BasicInst::AnyAny {
1833            kind: AnyAnyKind::Xor32,
1834            dst,
1835            src1: cast_reg_any(src1).unwrap(),
1836            src2: RegImm::Imm(!0),
1837        }));
1838        return true;
1839    }
1840
1841    if (kind == K::Minimum || kind == K::Maximum) && (src1 == RReg::Zero || src2 == RReg::Zero) {
1842        if src1 == RReg::Zero && src2 == RReg::Zero {
1843            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
1844            return true;
1845        }
1846
1847        let tmp = Reg::E2;
1848        let src1 = cast_reg_any(src1).unwrap();
1849        let src2 = cast_reg_any(src2).unwrap();
1850        let (kind, cmp_src1, cmp_src2) = match kind {
1851            K::Minimum => (AnyAnyKind::SetLessThanSigned32, src1, src2),
1852            K::Maximum => (AnyAnyKind::SetLessThanSigned32, src2, src1),
1853            _ => unreachable!(),
1854        };
1855
1856        emit(InstExt::Basic(BasicInst::AnyAny {
1857            kind,
1858            dst: tmp,
1859            src1: cmp_src1,
1860            src2: cmp_src2,
1861        }));
1862
1863        match src1 {
1864            RegImm::Reg(src) => emit(InstExt::Basic(BasicInst::MoveReg { dst, src })),
1865            RegImm::Imm(imm) => emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm })),
1866        }
1867
1868        emit(InstExt::Basic(BasicInst::Cmov {
1869            kind: CmovKind::EqZero,
1870            dst,
1871            src: src2,
1872            cond: tmp,
1873        }));
1874
1875        return true;
1876    }
1877
1878    if matches!(kind, K::RotateLeft32AndSignExtend | K::RotateRight32AndSignExtend) && src1 != RReg::Zero && src2 == RReg::Zero {
1879        emit(InstExt::Basic(BasicInst::AnyAny {
1880            kind: AnyAnyKind::Add32AndSignExtend,
1881            dst,
1882            src1: cast_reg_any(src1).unwrap(),
1883            src2: RegImm::Imm(0),
1884        }));
1885        return true;
1886    }
1887
1888    false
1889}
1890
1891fn emit_or_combine_byte(
1892    location: SectionTarget,
1893    dst: Reg,
1894    src: Reg,
1895    rv64: bool,
1896    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1897) {
1898    let op_reg = dst;
1899    let cmp_reg = Reg::E1;
1900    let tmp_reg = Reg::E2;
1901    let mask_reg = if dst != src { src } else { Reg::E3 };
1902    let range = if rv64 { 0..64 } else { 0..32 };
1903
1904    log::warn!("Emulating orc.b at {:?} with an instruction sequence", location);
1905
1906    if dst != src {
1907        emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
1908    }
1909
1910    // Loop:
1911    // mov tmp, op
1912    // shl mask, 8
1913    // or tmp, mask
1914    // test op, mask
1915    // cmov.neq op, tmp
1916
1917    for iter in range.step_by(8) {
1918        emit(InstExt::Basic(BasicInst::MoveReg { dst: tmp_reg, src: op_reg }));
1919
1920        if iter == 0 {
1921            emit(InstExt::Basic(BasicInst::LoadImmediate { dst: mask_reg, imm: 0xff }));
1922        } else {
1923            emit(InstExt::Basic(BasicInst::AnyAny {
1924                kind: if rv64 {
1925                    AnyAnyKind::ShiftLogicalLeft64
1926                } else {
1927                    AnyAnyKind::ShiftLogicalLeft32
1928                },
1929                dst: mask_reg,
1930                src1: RegImm::Reg(mask_reg),
1931                src2: RegImm::Imm(8),
1932            }));
1933        }
1934
1935        emit(InstExt::Basic(BasicInst::AnyAny {
1936            kind: if rv64 { AnyAnyKind::Or64 } else { AnyAnyKind::Or32 },
1937            dst: tmp_reg,
1938            src1: RegImm::Reg(tmp_reg),
1939            src2: RegImm::Reg(mask_reg),
1940        }));
1941
1942        emit(InstExt::Basic(BasicInst::AnyAny {
1943            kind: if rv64 { AnyAnyKind::And64 } else { AnyAnyKind::And32 },
1944            dst: cmp_reg,
1945            src1: RegImm::Reg(op_reg),
1946            src2: RegImm::Reg(mask_reg),
1947        }));
1948
1949        emit(InstExt::Basic(BasicInst::Cmov {
1950            kind: CmovKind::NotEqZero,
1951            dst: op_reg,
1952            src: RegImm::Reg(tmp_reg),
1953            cond: cmp_reg,
1954        }));
1955    }
1956}
1957
1958fn convert_instruction(
1959    elf: &Elf,
1960    section: &Section,
1961    current_location: SectionTarget,
1962    instruction: Inst,
1963    instruction_size: u64,
1964    rv64: bool,
1965    mut emit: impl FnMut(InstExt<SectionTarget, SectionTarget>),
1966) -> Result<(), ProgramFromElfError> {
1967    match instruction {
1968        Inst::LoadUpperImmediate { dst, value } => {
1969            let Some(dst) = cast_reg_non_zero(dst)? else {
1970                emit(InstExt::nop());
1971                return Ok(());
1972            };
1973
1974            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: value as i32 }));
1975            Ok(())
1976        }
1977        Inst::JumpAndLink { dst, target } => {
1978            let target = SectionTarget {
1979                section_index: section.index(),
1980                offset: current_location.offset.wrapping_add_signed(i64::from(target as i32)),
1981            };
1982
1983            if target.offset > section.size() {
1984                return Err(ProgramFromElfError::other("out of range JAL instruction"));
1985            }
1986
1987            let next = if let Some(dst) = cast_reg_non_zero(dst)? {
1988                let target_return = current_location.add(instruction_size);
1989                ControlInst::Call {
1990                    ra: dst,
1991                    target,
1992                    target_return,
1993                }
1994            } else {
1995                ControlInst::Jump { target }
1996            };
1997
1998            emit(InstExt::Control(next));
1999            Ok(())
2000        }
2001        Inst::Branch { kind, src1, src2, target } => {
2002            let src1 = cast_reg_any(src1)?;
2003            let src2 = cast_reg_any(src2)?;
2004
2005            let target_true = SectionTarget {
2006                section_index: section.index(),
2007                offset: current_location.offset.wrapping_add_signed(i64::from(target as i32)),
2008            };
2009
2010            if target_true.offset > section.size() {
2011                return Err(ProgramFromElfError::other("out of range unrelocated branch"));
2012            }
2013
2014            let target_false = current_location.add(instruction_size);
2015            emit(InstExt::Control(ControlInst::Branch {
2016                kind,
2017                src1,
2018                src2,
2019                target_true,
2020                target_false,
2021            }));
2022            Ok(())
2023        }
2024        Inst::JumpAndLinkRegister { dst, base, value } => {
2025            let Some(base) = cast_reg_non_zero(base)? else {
2026                return Err(ProgramFromElfError::other("found an unrelocated JALR instruction"));
2027            };
2028
2029            let next = if let Some(dst) = cast_reg_non_zero(dst)? {
2030                let target_return = current_location.add(instruction_size);
2031                ControlInst::CallIndirect {
2032                    ra: dst,
2033                    base,
2034                    offset: value.into(),
2035                    target_return,
2036                }
2037            } else {
2038                ControlInst::JumpIndirect {
2039                    base,
2040                    offset: value.into(),
2041                }
2042            };
2043
2044            emit(InstExt::Control(next));
2045            Ok(())
2046        }
2047        Inst::Unimplemented => {
2048            emit(InstExt::Control(ControlInst::Unimplemented));
2049            Ok(())
2050        }
2051        Inst::FenceI | Inst::Fence { .. } => {
2052            emit(InstExt::Basic(BasicInst::Nop));
2053            Ok(())
2054        }
2055        Inst::Load { kind, dst, base, offset } => {
2056            if dst == RReg::Zero && base == RReg::Zero && offset == 0 {
2057                // These are sometimes used as a poor man's trap.
2058                emit(InstExt::Control(ControlInst::Unimplemented));
2059                return Ok(());
2060            }
2061
2062            let Some(base) = cast_reg_non_zero(base)? else {
2063                return Err(ProgramFromElfError::other(format!(
2064                    "found an unrelocated absolute load at {}",
2065                    current_location.fmt_human_readable(elf)
2066                )));
2067            };
2068
2069            // LLVM riscv-enable-dead-defs pass may rewrite dst to the zero register.
2070            match cast_reg_non_zero(dst)? {
2071                Some(dst) => emit(InstExt::Basic(BasicInst::LoadIndirect { kind, dst, base, offset })),
2072                None => emit(InstExt::Basic(BasicInst::Nop)),
2073            }
2074
2075            Ok(())
2076        }
2077        Inst::Store { kind, src, base, offset } => {
2078            if src == RReg::Zero && base == RReg::Zero && offset == 0 {
2079                emit(InstExt::Control(ControlInst::Unimplemented));
2080                return Ok(());
2081            }
2082
2083            let Some(base) = cast_reg_non_zero(base)? else {
2084                return Err(ProgramFromElfError::other(format!(
2085                    "found an unrelocated absolute store at {}",
2086                    current_location.fmt_human_readable(elf)
2087                )));
2088            };
2089
2090            let src = cast_reg_any(src)?;
2091            emit(InstExt::Basic(BasicInst::StoreIndirect { kind, src, base, offset }));
2092            Ok(())
2093        }
2094        Inst::RegImm { kind, dst, src, imm } => {
2095            let Some(dst) = cast_reg_non_zero(dst)? else {
2096                emit(InstExt::nop());
2097                return Ok(());
2098            };
2099
2100            let src = cast_reg_any(src)?;
2101            let kind = match kind {
2102                RegImmKind::Add32 => AnyAnyKind::Add32,
2103                RegImmKind::Add32AndSignExtend => AnyAnyKind::Add32AndSignExtend,
2104                RegImmKind::Add64 => AnyAnyKind::Add64,
2105                RegImmKind::And32 => AnyAnyKind::And32,
2106                RegImmKind::And64 => AnyAnyKind::And64,
2107                RegImmKind::Or32 => AnyAnyKind::Or32,
2108                RegImmKind::Or64 => AnyAnyKind::Or64,
2109                RegImmKind::Xor32 => AnyAnyKind::Xor32,
2110                RegImmKind::Xor64 => AnyAnyKind::Xor64,
2111                RegImmKind::SetLessThanUnsigned32 => AnyAnyKind::SetLessThanUnsigned32,
2112                RegImmKind::SetLessThanUnsigned64 => AnyAnyKind::SetLessThanUnsigned64,
2113                RegImmKind::SetLessThanSigned32 => AnyAnyKind::SetLessThanSigned32,
2114                RegImmKind::SetLessThanSigned64 => AnyAnyKind::SetLessThanSigned64,
2115                RegImmKind::ShiftLogicalLeft32 => AnyAnyKind::ShiftLogicalLeft32,
2116                RegImmKind::ShiftLogicalLeft32AndSignExtend => AnyAnyKind::ShiftLogicalLeft32AndSignExtend,
2117                RegImmKind::ShiftLogicalLeft64 => AnyAnyKind::ShiftLogicalLeft64,
2118                RegImmKind::ShiftLogicalRight32 => AnyAnyKind::ShiftLogicalRight32,
2119                RegImmKind::ShiftLogicalRight32AndSignExtend => AnyAnyKind::ShiftLogicalRight32AndSignExtend,
2120                RegImmKind::ShiftLogicalRight64 => AnyAnyKind::ShiftLogicalRight64,
2121                RegImmKind::ShiftArithmeticRight32 => AnyAnyKind::ShiftArithmeticRight32,
2122                RegImmKind::ShiftArithmeticRight32AndSignExtend => AnyAnyKind::ShiftArithmeticRight32AndSignExtend,
2123                RegImmKind::ShiftArithmeticRight64 => AnyAnyKind::ShiftArithmeticRight64,
2124                RegImmKind::RotateRight32 => AnyAnyKind::RotateRight32,
2125                RegImmKind::RotateRight32AndSignExtend => AnyAnyKind::RotateRight32AndSignExtend,
2126                RegImmKind::RotateRight64 => AnyAnyKind::RotateRight64,
2127            };
2128
2129            match src {
2130                RegImm::Imm(0) => {
2131                    // The optimizer can take care of this later, but doing it early here is more efficient.
2132                    emit(InstExt::Basic(BasicInst::LoadImmediate {
2133                        dst,
2134                        imm: OperationKind::from(kind)
2135                            .apply_const(0, cast(imm).to_i64_sign_extend())
2136                            .try_into()
2137                            .expect("load immediate overflow"),
2138                    }));
2139                }
2140                RegImm::Reg(src) if imm == 0 && matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64) => {
2141                    emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
2142                }
2143                _ => {
2144                    emit(InstExt::Basic(BasicInst::AnyAny {
2145                        kind,
2146                        dst,
2147                        src1: src,
2148                        src2: imm.into(),
2149                    }));
2150                }
2151            }
2152
2153            Ok(())
2154        }
2155        Inst::Reg { kind, dst, src } => {
2156            let Some(dst) = cast_reg_non_zero(dst)? else {
2157                emit(InstExt::nop());
2158                return Ok(());
2159            };
2160
2161            use crate::riscv::RegKind as K;
2162
2163            let Some(src) = cast_reg_non_zero(src)? else {
2164                let imm = match kind {
2165                    K::CountLeadingZeroBits32 | K::CountTrailingZeroBits32 => 32,
2166                    K::CountLeadingZeroBits64 | K::CountTrailingZeroBits64 => 64,
2167                    K::CountSetBits32 | K::CountSetBits64 => 0,
2168                    K::ReverseByte => 0,
2169                    K::OrCombineByte => 0,
2170                    K::SignExtend8 | K::SignExtend16 | K::ZeroExtend16 => 0,
2171                };
2172
2173                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm }));
2174                return Ok(());
2175            };
2176
2177            let kind = match kind {
2178                K::CountLeadingZeroBits32 => RegKind::CountLeadingZeroBits32,
2179                K::CountLeadingZeroBits64 => RegKind::CountLeadingZeroBits64,
2180                K::CountSetBits32 => RegKind::CountSetBits32,
2181                K::CountSetBits64 => RegKind::CountSetBits64,
2182                K::CountTrailingZeroBits32 => RegKind::CountTrailingZeroBits32,
2183                K::CountTrailingZeroBits64 => RegKind::CountTrailingZeroBits64,
2184                K::ReverseByte => RegKind::ReverseByte,
2185                K::SignExtend8 => RegKind::SignExtend8,
2186                K::SignExtend16 => RegKind::SignExtend16,
2187                K::ZeroExtend16 => RegKind::ZeroExtend16,
2188                K::OrCombineByte => {
2189                    emit_or_combine_byte(current_location, dst, src, rv64, &mut emit);
2190                    return Ok(());
2191                }
2192            };
2193
2194            emit(InstExt::Basic(BasicInst::Reg { kind, dst, src }));
2195
2196            Ok(())
2197        }
2198        Inst::RegReg { kind, dst, src1, src2 } => {
2199            let Some(dst) = cast_reg_non_zero(dst)? else {
2200                emit(InstExt::nop());
2201                return Ok(());
2202            };
2203
2204            macro_rules! anyany {
2205                ($kind:ident) => {
2206                    BasicInst::AnyAny {
2207                        kind: AnyAnyKind::$kind,
2208                        dst,
2209                        src1: cast_reg_any(src1)?,
2210                        src2: cast_reg_any(src2)?,
2211                    }
2212                };
2213            }
2214
2215            macro_rules! regreg {
2216                ($kind:ident) => {
2217                    match (cast_reg_non_zero(src1)?, cast_reg_non_zero(src2)?) {
2218                        (Some(src1), Some(src2)) => BasicInst::RegReg {
2219                            kind: RegRegKind::$kind,
2220                            dst,
2221                            src1,
2222                            src2,
2223                        },
2224                        (lhs, rhs) => {
2225                            let lhs = lhs
2226                                .map(|reg| RegValue::Reg {
2227                                    reg,
2228                                    direction: Direction::Input,
2229                                    source_block: BlockTarget::from_raw(0),
2230                                    bits_used: u64::MAX,
2231                                    addend: 0,
2232                                })
2233                                .unwrap_or(RegValue::Constant(0));
2234
2235                            let rhs = rhs
2236                                .map(|reg| RegValue::Reg {
2237                                    reg,
2238                                    direction: Direction::Input,
2239                                    source_block: BlockTarget::from_raw(0),
2240                                    bits_used: u64::MAX,
2241                                    addend: 0,
2242                                })
2243                                .unwrap_or(RegValue::Constant(0));
2244
2245                            match OperationKind::from(RegRegKind::$kind).apply(elf, lhs, rhs) {
2246                                Some(RegValue::Constant(imm)) => {
2247                                    let imm: i32 = imm.try_into().expect("immediate operand overflow");
2248                                    BasicInst::LoadImmediate { dst, imm }
2249                                }
2250                                Some(RegValue::Reg {
2251                                    reg,
2252                                    direction: Direction::Input,
2253                                    addend: 0,
2254                                    ..
2255                                }) => BasicInst::MoveReg { dst, src: reg },
2256                                _ => {
2257                                    return Err(ProgramFromElfError::other(format!(
2258                                        "found a {:?} instruction using a zero register",
2259                                        kind
2260                                    )))
2261                                }
2262                            }
2263                        }
2264                    }
2265                };
2266            }
2267
2268            if resolve_simple_zero_register_usage(kind, dst, src1, src2, &mut emit) {
2269                emit(InstExt::nop());
2270                return Ok(());
2271            };
2272
2273            use crate::riscv::RegRegKind as K;
2274            let instruction = match kind {
2275                K::Add32 => anyany!(Add32),
2276                K::Add32AndSignExtend => anyany!(Add32AndSignExtend),
2277                K::Add64 => anyany!(Add64),
2278                K::Sub32 => anyany!(Sub32),
2279                K::Sub32AndSignExtend => anyany!(Sub32AndSignExtend),
2280                K::Sub64 => anyany!(Sub64),
2281                K::And32 => anyany!(And32),
2282                K::And64 => anyany!(And64),
2283                K::Or32 => anyany!(Or32),
2284                K::Or64 => anyany!(Or64),
2285                K::Xor32 => anyany!(Xor32),
2286                K::Xor64 => anyany!(Xor64),
2287                K::SetLessThanUnsigned32 => anyany!(SetLessThanUnsigned32),
2288                K::SetLessThanUnsigned64 => anyany!(SetLessThanUnsigned64),
2289                K::SetLessThanSigned32 => anyany!(SetLessThanSigned32),
2290                K::SetLessThanSigned64 => anyany!(SetLessThanSigned64),
2291                K::ShiftLogicalLeft32 => anyany!(ShiftLogicalLeft32),
2292                K::ShiftLogicalLeft32AndSignExtend => anyany!(ShiftLogicalLeft32AndSignExtend),
2293                K::ShiftLogicalLeft64 => anyany!(ShiftLogicalLeft64),
2294                K::ShiftLogicalRight32 => anyany!(ShiftLogicalRight32),
2295                K::ShiftLogicalRight32AndSignExtend => anyany!(ShiftLogicalRight32AndSignExtend),
2296                K::ShiftLogicalRight64 => anyany!(ShiftLogicalRight64),
2297                K::ShiftArithmeticRight32 => anyany!(ShiftArithmeticRight32),
2298                K::ShiftArithmeticRight32AndSignExtend => anyany!(ShiftArithmeticRight32AndSignExtend),
2299                K::ShiftArithmeticRight64 => anyany!(ShiftArithmeticRight64),
2300                K::Mul32 => anyany!(Mul32),
2301                K::Mul32AndSignExtend => anyany!(Mul32AndSignExtend),
2302                K::Mul64 => anyany!(Mul64),
2303                K::MulUpperSignedSigned32 => regreg!(MulUpperSignedSigned32),
2304                K::MulUpperSignedSigned64 => regreg!(MulUpperSignedSigned64),
2305                K::MulUpperUnsignedUnsigned32 => regreg!(MulUpperUnsignedUnsigned32),
2306                K::MulUpperUnsignedUnsigned64 => regreg!(MulUpperUnsignedUnsigned64),
2307                K::MulUpperSignedUnsigned32 => regreg!(MulUpperSignedUnsigned32),
2308                K::MulUpperSignedUnsigned64 => regreg!(MulUpperSignedUnsigned64),
2309                K::Div32 => regreg!(Div32),
2310                K::Div32AndSignExtend => regreg!(Div32AndSignExtend),
2311                K::Div64 => regreg!(Div64),
2312                K::DivUnsigned32 => regreg!(DivUnsigned32),
2313                K::DivUnsigned32AndSignExtend => regreg!(DivUnsigned32AndSignExtend),
2314                K::DivUnsigned64 => regreg!(DivUnsigned64),
2315                K::Rem32 => regreg!(Rem32),
2316                K::Rem32AndSignExtend => regreg!(Rem32AndSignExtend),
2317                K::Rem64 => regreg!(Rem64),
2318                K::RemUnsigned32 => regreg!(RemUnsigned32),
2319                K::RemUnsigned32AndSignExtend => regreg!(RemUnsigned32AndSignExtend),
2320                K::RemUnsigned64 => regreg!(RemUnsigned64),
2321
2322                K::AndInverted => regreg!(AndInverted),
2323                K::OrInverted => regreg!(OrInverted),
2324                K::Xnor => regreg!(Xnor),
2325                K::Maximum => regreg!(Maximum),
2326                K::MaximumUnsigned => regreg!(MaximumUnsigned),
2327                K::Minimum => regreg!(Minimum),
2328                K::MinimumUnsigned => regreg!(MinimumUnsigned),
2329                K::RotateLeft32 => regreg!(RotateLeft32),
2330                K::RotateLeft32AndSignExtend => regreg!(RotateLeft32AndSignExtend),
2331                K::RotateLeft64 => regreg!(RotateLeft64),
2332                K::RotateRight32 => anyany!(RotateRight32),
2333                K::RotateRight32AndSignExtend => anyany!(RotateRight32AndSignExtend),
2334                K::RotateRight64 => anyany!(RotateRight64),
2335            };
2336
2337            emit(InstExt::Basic(instruction));
2338            Ok(())
2339        }
2340        Inst::AddUpperImmediateToPc { .. } => Err(ProgramFromElfError::other(format!(
2341            "found an unrelocated auipc instruction in {} ('{}') at address 0x{:x}; is the program compiled with relocations?",
2342            current_location,
2343            section.name(),
2344            section.original_address() + current_location.offset
2345        ))),
2346        Inst::Ecall => Err(ProgramFromElfError::other(
2347            "found a bare ecall instruction; those are not supported",
2348        )),
2349        Inst::Cmov { kind, dst, src, cond, .. } => {
2350            let Some(dst) = cast_reg_non_zero(dst)? else {
2351                emit(InstExt::Basic(BasicInst::Nop));
2352                return Ok(());
2353            };
2354
2355            match cast_reg_non_zero(cond)? {
2356                Some(cond) => {
2357                    emit(InstExt::Basic(BasicInst::Cmov {
2358                        kind,
2359                        dst,
2360                        src: cast_reg_any(src)?,
2361                        cond,
2362                    }));
2363                }
2364                None => match kind {
2365                    CmovKind::EqZero => {
2366                        if let Some(src) = cast_reg_non_zero(src)? {
2367                            emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
2368                        } else {
2369                            emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2370                        }
2371                    }
2372                    CmovKind::NotEqZero => {
2373                        emit(InstExt::nop());
2374                    }
2375                },
2376            };
2377
2378            Ok(())
2379        }
2380        Inst::MipsCmov {
2381            dst,
2382            src_true,
2383            src_false,
2384            cond,
2385            ..
2386        } => {
2387            let Some(dst) = cast_reg_non_zero(dst)? else {
2388                emit(InstExt::Basic(BasicInst::Nop));
2389                return Ok(());
2390            };
2391
2392            let src_true = cast_reg_non_zero(src_true)?;
2393            let src_false = cast_reg_non_zero(src_false)?;
2394
2395            let Some(cond) = cast_reg_non_zero(cond)? else {
2396                if let Some(src) = src_false {
2397                    emit(InstExt::Basic(BasicInst::MoveReg { dst, src }));
2398                } else {
2399                    emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2400                }
2401                return Ok(());
2402            };
2403
2404            if Some(dst) == src_true {
2405                emit(InstExt::Basic(BasicInst::Cmov {
2406                    kind: CmovKind::EqZero,
2407                    dst,
2408                    src: src_false.map(|reg| reg.into()).unwrap_or(RegImm::Imm(0)),
2409                    cond,
2410                }));
2411            } else if Some(dst) == src_false {
2412                emit(InstExt::Basic(BasicInst::Cmov {
2413                    kind: CmovKind::NotEqZero,
2414                    dst,
2415                    src: src_true.map(|reg| reg.into()).unwrap_or(RegImm::Imm(0)),
2416                    cond,
2417                }));
2418            } else if dst != cond {
2419                // `dst` is neither `src_true` nor `src_false` nor `cond`, so it's safe to do this.
2420                if let Some(src_false) = src_false {
2421                    emit(InstExt::Basic(BasicInst::MoveReg { dst, src: src_false }));
2422                } else {
2423                    emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2424                }
2425
2426                emit(InstExt::Basic(BasicInst::Cmov {
2427                    kind: CmovKind::NotEqZero,
2428                    dst,
2429                    src: src_true.map(|reg| reg.into()).unwrap_or(RegImm::Imm(0)),
2430                    cond,
2431                }));
2432            } else {
2433                // TODO: This is suboptimal.
2434                emit(InstExt::Basic(BasicInst::MoveReg { dst: Reg::E0, src: cond }));
2435                emit(InstExt::Basic(BasicInst::Cmov {
2436                    kind: CmovKind::NotEqZero,
2437                    dst,
2438                    src: src_true.map(|reg| reg.into()).unwrap_or(RegImm::Imm(0)),
2439                    cond,
2440                }));
2441                emit(InstExt::Basic(BasicInst::Cmov {
2442                    kind: CmovKind::EqZero,
2443                    dst,
2444                    src: src_false.map(|reg| reg.into()).unwrap_or(RegImm::Imm(0)),
2445                    cond: Reg::E0,
2446                }));
2447            }
2448
2449            Ok(())
2450        }
2451        Inst::LoadReserved32 { dst, src, .. } => {
2452            let Some(dst) = cast_reg_non_zero(dst)? else {
2453                return Err(ProgramFromElfError::other(
2454                    "found an atomic load with a zero register as the destination",
2455                ));
2456            };
2457
2458            let Some(src) = cast_reg_non_zero(src)? else {
2459                return Err(ProgramFromElfError::other(
2460                    "found an atomic load with a zero register as the source",
2461                ));
2462            };
2463
2464            emit(InstExt::Basic(BasicInst::LoadIndirect {
2465                kind: LoadKind::I32,
2466                dst,
2467                base: src,
2468                offset: 0,
2469            }));
2470
2471            Ok(())
2472        }
2473        Inst::LoadReserved64 { dst, src, .. } if rv64 => {
2474            let Some(dst) = cast_reg_non_zero(dst)? else {
2475                return Err(ProgramFromElfError::other(
2476                    "found an atomic load with a zero register as the destination",
2477                ));
2478            };
2479
2480            let Some(src) = cast_reg_non_zero(src)? else {
2481                return Err(ProgramFromElfError::other(
2482                    "found an atomic load with a zero register as the source",
2483                ));
2484            };
2485
2486            emit(InstExt::Basic(BasicInst::LoadIndirect {
2487                kind: LoadKind::U64,
2488                dst,
2489                base: src,
2490                offset: 0,
2491            }));
2492
2493            Ok(())
2494        }
2495        Inst::StoreConditional32 { src, addr, dst, .. } => {
2496            let Some(addr) = cast_reg_non_zero(addr)? else {
2497                return Err(ProgramFromElfError::other(
2498                    "found an atomic store with a zero register as the address",
2499                ));
2500            };
2501
2502            let src = cast_reg_any(src)?;
2503            emit(InstExt::Basic(BasicInst::StoreIndirect {
2504                kind: StoreKind::U32,
2505                src,
2506                base: addr,
2507                offset: 0,
2508            }));
2509
2510            if let Some(dst) = cast_reg_non_zero(dst)? {
2511                // The store always succeeds, so write zero here.
2512                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2513            }
2514
2515            Ok(())
2516        }
2517        Inst::StoreConditional64 { src, addr, dst, .. } if rv64 => {
2518            let Some(addr) = cast_reg_non_zero(addr)? else {
2519                return Err(ProgramFromElfError::other(
2520                    "found an atomic store with a zero register as the address",
2521                ));
2522            };
2523
2524            let src = cast_reg_any(src)?;
2525            emit(InstExt::Basic(BasicInst::StoreIndirect {
2526                kind: StoreKind::U64,
2527                src,
2528                base: addr,
2529                offset: 0,
2530            }));
2531
2532            if let Some(dst) = cast_reg_non_zero(dst)? {
2533                // The store always succeeds, so write zero here.
2534                emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 }));
2535            }
2536
2537            Ok(())
2538        }
2539        Inst::LoadReserved64 { .. } | Inst::StoreConditional64 { .. } => {
2540            unreachable!("64-bit instruction in a 32-bit program: {instruction:?}");
2541        }
2542        Inst::Atomic {
2543            kind,
2544            dst: old_value,
2545            addr,
2546            src: operand,
2547            ..
2548        } => {
2549            let Some(addr) = cast_reg_non_zero(addr)? else {
2550                return Err(ProgramFromElfError::other(
2551                    "found an atomic operation with a zero register as the address",
2552                ));
2553            };
2554
2555            let is_64_bit = match kind {
2556                AtomicKind::Swap32
2557                | AtomicKind::Add32
2558                | AtomicKind::And32
2559                | AtomicKind::Or32
2560                | AtomicKind::Xor32
2561                | AtomicKind::MaxSigned32
2562                | AtomicKind::MinSigned32
2563                | AtomicKind::MaxUnsigned32
2564                | AtomicKind::MinUnsigned32 => false,
2565
2566                AtomicKind::Swap64
2567                | AtomicKind::Add64
2568                | AtomicKind::MaxSigned64
2569                | AtomicKind::MinSigned64
2570                | AtomicKind::MaxUnsigned64
2571                | AtomicKind::MinUnsigned64
2572                | AtomicKind::And64
2573                | AtomicKind::Or64
2574                | AtomicKind::Xor64 => true,
2575            };
2576
2577            let mut operand = cast_reg_non_zero(operand)?;
2578            if rv64 && !is_64_bit {
2579                // Zero-extend the operand to ignore any bits that might be there.
2580                if let Some(src) = operand {
2581                    emit(InstExt::Basic(BasicInst::AnyAny {
2582                        kind: AnyAnyKind::ShiftLogicalLeft64,
2583                        dst: Reg::E3,
2584                        src1: RegImm::Reg(src),
2585                        src2: RegImm::Imm(32),
2586                    }));
2587                    emit(InstExt::Basic(BasicInst::AnyAny {
2588                        kind: AnyAnyKind::ShiftArithmeticRight64,
2589                        dst: Reg::E3,
2590                        src1: RegImm::Reg(Reg::E3),
2591                        src2: RegImm::Imm(32),
2592                    }));
2593
2594                    operand = Some(Reg::E3);
2595                }
2596            }
2597            let operand_regimm = operand.map_or(RegImm::Imm(0), RegImm::Reg);
2598            let (old_value, new_value, output) = match cast_reg_non_zero(old_value)? {
2599                None => (Reg::E0, Reg::E0, None),
2600                Some(old_value) if old_value == addr || Some(old_value) == operand => (Reg::E0, Reg::E1, Some(old_value)),
2601                Some(old_value) => (old_value, Reg::E0, None),
2602            };
2603
2604            emit(InstExt::Basic(BasicInst::LoadIndirect {
2605                kind: if is_64_bit { LoadKind::U64 } else { LoadKind::I32 },
2606                dst: old_value,
2607                base: addr,
2608                offset: 0,
2609            }));
2610
2611            match kind {
2612                AtomicKind::Swap64 => {
2613                    emit(InstExt::Basic(BasicInst::AnyAny {
2614                        kind: AnyAnyKind::Add64,
2615                        dst: new_value,
2616                        src1: operand_regimm,
2617                        src2: RegImm::Imm(0),
2618                    }));
2619                }
2620                AtomicKind::Swap32 => {
2621                    emit(InstExt::Basic(BasicInst::AnyAny {
2622                        kind: AnyAnyKind::Add32,
2623                        dst: new_value,
2624                        src1: operand_regimm,
2625                        src2: RegImm::Imm(0),
2626                    }));
2627                }
2628                AtomicKind::Add64 => {
2629                    emit(InstExt::Basic(BasicInst::AnyAny {
2630                        kind: AnyAnyKind::Add64,
2631                        dst: new_value,
2632                        src1: old_value.into(),
2633                        src2: operand_regimm,
2634                    }));
2635                }
2636                AtomicKind::Add32 => {
2637                    emit(InstExt::Basic(BasicInst::AnyAny {
2638                        kind: AnyAnyKind::Add32,
2639                        dst: new_value,
2640                        src1: old_value.into(),
2641                        src2: operand_regimm,
2642                    }));
2643                }
2644                AtomicKind::And64 => {
2645                    emit(InstExt::Basic(BasicInst::AnyAny {
2646                        kind: AnyAnyKind::And64,
2647                        dst: new_value,
2648                        src1: old_value.into(),
2649                        src2: operand_regimm,
2650                    }));
2651                }
2652                AtomicKind::And32 => {
2653                    emit(InstExt::Basic(BasicInst::AnyAny {
2654                        kind: AnyAnyKind::And32,
2655                        dst: new_value,
2656                        src1: old_value.into(),
2657                        src2: operand_regimm,
2658                    }));
2659                }
2660                AtomicKind::Or64 => {
2661                    emit(InstExt::Basic(BasicInst::AnyAny {
2662                        kind: AnyAnyKind::Or64,
2663                        dst: new_value,
2664                        src1: old_value.into(),
2665                        src2: operand_regimm,
2666                    }));
2667                }
2668                AtomicKind::Or32 => {
2669                    emit(InstExt::Basic(BasicInst::AnyAny {
2670                        kind: AnyAnyKind::Or32,
2671                        dst: new_value,
2672                        src1: old_value.into(),
2673                        src2: operand_regimm,
2674                    }));
2675                }
2676                AtomicKind::Xor64 => {
2677                    emit(InstExt::Basic(BasicInst::AnyAny {
2678                        kind: AnyAnyKind::Xor64,
2679                        dst: new_value,
2680                        src1: old_value.into(),
2681                        src2: operand_regimm,
2682                    }));
2683                }
2684                AtomicKind::Xor32 => {
2685                    emit(InstExt::Basic(BasicInst::AnyAny {
2686                        kind: AnyAnyKind::Xor32,
2687                        dst: new_value,
2688                        src1: old_value.into(),
2689                        src2: operand_regimm,
2690                    }));
2691                }
2692                AtomicKind::MaxSigned32 => {
2693                    emit_minmax(MinMax::MaxSigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2694                }
2695                AtomicKind::MinSigned32 => {
2696                    emit_minmax(MinMax::MinSigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2697                }
2698                AtomicKind::MaxUnsigned32 => {
2699                    emit_minmax(MinMax::MaxUnsigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2700                }
2701                AtomicKind::MinUnsigned32 => {
2702                    emit_minmax(MinMax::MinUnsigned, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2703                }
2704                AtomicKind::MaxSigned64 => {
2705                    emit_minmax(MinMax::MaxSigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2706                }
2707                AtomicKind::MinSigned64 => {
2708                    emit_minmax(MinMax::MinSigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2709                }
2710                AtomicKind::MaxUnsigned64 => {
2711                    emit_minmax(MinMax::MaxUnsigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2712                }
2713                AtomicKind::MinUnsigned64 => {
2714                    emit_minmax(MinMax::MinUnsigned64, new_value, Some(old_value), operand, Reg::E2, &mut emit);
2715                }
2716            }
2717
2718            emit(InstExt::Basic(BasicInst::StoreIndirect {
2719                kind: if is_64_bit { StoreKind::U64 } else { StoreKind::U32 },
2720                src: new_value.into(),
2721                base: addr,
2722                offset: 0,
2723            }));
2724
2725            if let Some(output) = output {
2726                emit(InstExt::Basic(BasicInst::MoveReg {
2727                    dst: output,
2728                    src: old_value,
2729                }));
2730            }
2731
2732            Ok(())
2733        }
2734    }
2735}
2736
2737/// Read `n` bytes in `text` at `relative_offset` where `n` is
2738/// the length of the instruction at `relative_offset`.
2739///
2740/// # Panics
2741/// - Valid RISC-V instructions can be 2 or 4 bytes. Misaligned
2742///   `relative_offset` are considered an internal error.
2743/// - `relative_offset` is expected to be inbounds.
2744///
2745/// # Returns
2746/// The instruction length and the raw instruction.
2747fn read_instruction_bytes(text: &[u8], relative_offset: usize) -> (u64, u32) {
2748    assert!(
2749        relative_offset % VM_CODE_ADDRESS_ALIGNMENT as usize == 0,
2750        "internal error: misaligned instruction read: 0x{relative_offset:08x}"
2751    );
2752
2753    if Inst::is_compressed(text[relative_offset]) {
2754        (2, u32::from(u16::from_le_bytes([text[relative_offset], text[relative_offset + 1]])))
2755    } else {
2756        (
2757            4,
2758            u32::from_le_bytes([
2759                text[relative_offset],
2760                text[relative_offset + 1],
2761                text[relative_offset + 2],
2762                text[relative_offset + 3],
2763            ]),
2764        )
2765    }
2766}
2767
2768const FUNC3_ECALLI: u32 = 0b000;
2769const FUNC3_SBRK: u32 = 0b001;
2770const FUNC3_MEMSET: u32 = 0b010;
2771const FUNC3_HEAP_BASE: u32 = 0b011;
2772
2773fn try_parse_epilogue(
2774    decoder_config: &DecoderConfig,
2775    elf: &Elf,
2776    mut instruction: Inst,
2777    pc: &mut usize,
2778    source: Source,
2779    text: &[u8],
2780    output: &mut Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
2781) -> Result<bool, ProgramFromElfError> {
2782    // For example, the pattern is:
2783    //    ld      ra,48(sp)
2784    //    ld      s0,40(sp)
2785    //    ld      s1,32(sp)
2786    //    addi    sp,sp,56
2787    //    ret
2788
2789    let (native_add_kind, native_load_kind, native_reg_size) = if elf.is_64() {
2790        (RegImmKind::Add64, LoadKind::U64, 8)
2791    } else {
2792        (RegImmKind::Add32, LoadKind::U32, 4)
2793    };
2794
2795    let mut stack_space = None;
2796    let mut current_pc = *pc;
2797    let mut regs = Vec::new();
2798    loop {
2799        if current_pc >= text.len() {
2800            return Ok(false);
2801        }
2802
2803        let Inst::Load {
2804            kind,
2805            dst,
2806            base: RReg::SP,
2807            offset,
2808        } = instruction
2809        else {
2810            break;
2811        };
2812
2813        let Some(dst) = cast_reg_non_zero(dst)? else {
2814            return Ok(false);
2815        };
2816
2817        if kind != native_load_kind || dst == Reg::SP || offset < 0 || (offset % native_reg_size) != 0 {
2818            return Ok(false);
2819        };
2820
2821        if let Some(stack_space) = stack_space {
2822            if offset != stack_space - native_reg_size * (1 + regs.len() as i32) {
2823                return Ok(false);
2824            }
2825        } else {
2826            stack_space = Some(offset + native_reg_size);
2827        }
2828
2829        let (next_inst_size, next_raw_inst) = read_instruction_bytes(text, current_pc);
2830        current_pc += next_inst_size as usize;
2831
2832        if let Some(new_instruction) = Inst::decode(decoder_config, next_raw_inst) {
2833            instruction = new_instruction;
2834        } else {
2835            return Ok(false);
2836        }
2837
2838        regs.push((cast(offset).to_unsigned(), dst));
2839    }
2840
2841    let Inst::RegImm {
2842        kind,
2843        dst: RReg::SP,
2844        src: RReg::SP,
2845        imm,
2846    } = instruction
2847    else {
2848        return Ok(false);
2849    };
2850
2851    let stack_space = stack_space.unwrap_or(imm);
2852    if kind != native_add_kind || imm <= 0 || imm != stack_space {
2853        return Ok(false);
2854    }
2855
2856    *pc = current_pc;
2857
2858    output.push((
2859        Source {
2860            section_index: source.section_index,
2861            offset_range: AddressRange::from(source.offset_range.start..current_pc as u64),
2862        },
2863        InstExt::Basic(BasicInst::Epilogue {
2864            stack_space: cast(stack_space).to_unsigned(),
2865            regs,
2866        }),
2867    ));
2868
2869    Ok(true)
2870}
2871
2872fn try_parse_prologue(
2873    decoder_config: &DecoderConfig,
2874    elf: &Elf,
2875    instruction: Inst,
2876    pc: &mut usize,
2877    source: Source,
2878    text: &[u8],
2879    output: &mut Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
2880) -> Result<bool, ProgramFromElfError> {
2881    let (native_add_kind, native_store_kind, native_reg_size) = if elf.is_64() {
2882        (RegImmKind::Add64, StoreKind::U64, 8)
2883    } else {
2884        (RegImmKind::Add32, StoreKind::U32, 4)
2885    };
2886    let Inst::RegImm {
2887        kind,
2888        dst: RReg::SP,
2889        src: RReg::SP,
2890        imm,
2891    } = instruction
2892    else {
2893        return Ok(false);
2894    };
2895    if kind != native_add_kind || imm >= 0 || (imm % native_reg_size) != 0 {
2896        return Ok(false);
2897    };
2898
2899    // For example, the pattern is:
2900    //    addi    sp,sp,-56
2901    //    sd      ra,48(sp)
2902    //    sd      s0,40(sp)
2903    //    sd      s1,32(sp)
2904    // Which would result in:
2905    //    Prologue { stack_space = 56, regs: [RA, S0, S1] }
2906
2907    let mut current_pc = *pc;
2908    let mut remaining = imm;
2909    let mut regs = Vec::new();
2910    while current_pc < text.len() && remaining < 0 {
2911        let (inst_size, raw_inst) = read_instruction_bytes(text, current_pc);
2912        let Some(Inst::Store {
2913            kind,
2914            src,
2915            base: RReg::SP,
2916            offset,
2917        }) = Inst::decode(decoder_config, raw_inst)
2918        else {
2919            break;
2920        };
2921
2922        let Some(src) = cast_reg_non_zero(src)? else {
2923            break;
2924        };
2925
2926        if src == Reg::SP || kind != native_store_kind || offset * -1 != (remaining + native_reg_size) {
2927            break;
2928        }
2929
2930        regs.push((cast(offset).to_unsigned(), src));
2931        current_pc += inst_size as usize;
2932        remaining += native_reg_size;
2933    }
2934
2935    *pc = current_pc;
2936    output.push((
2937        Source {
2938            section_index: source.section_index,
2939            offset_range: AddressRange::from(source.offset_range.start..current_pc as u64),
2940        },
2941        InstExt::Basic(BasicInst::Prologue {
2942            stack_space: cast(imm * -1).to_unsigned(),
2943            regs,
2944        }),
2945    ));
2946
2947    Ok(true)
2948}
2949
2950#[allow(clippy::too_many_arguments)]
2951fn parse_code_section(
2952    elf: &Elf,
2953    section: &Section,
2954    decoder_config: &DecoderConfig,
2955    relocations: &BTreeMap<SectionTarget, RelocationKind>,
2956    imports: &mut Vec<Import>,
2957    metadata_to_nth_import: &mut HashMap<ExternMetadata, usize>,
2958    instruction_overrides: &mut HashMap<SectionTarget, InstExt<SectionTarget, SectionTarget>>,
2959    output: &mut Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
2960    opt_level: OptLevel,
2961) -> Result<(), ProgramFromElfError> {
2962    let section_index = section.index();
2963    let section_name = section.name();
2964    let text = &section.data();
2965
2966    if text.len() % VM_CODE_ADDRESS_ALIGNMENT as usize != 0 {
2967        return Err(ProgramFromElfError::other(format!(
2968            "size of section '{section_name}' is not divisible by 2"
2969        )));
2970    }
2971
2972    output.reserve(text.len() / 4);
2973    let mut relative_offset = 0;
2974    while relative_offset < text.len() {
2975        let current_location = SectionTarget {
2976            section_index: section.index(),
2977            offset: relative_offset.try_into().expect("overflow"),
2978        };
2979
2980        let (inst_size, raw_inst) = read_instruction_bytes(text, relative_offset);
2981
2982        if crate::riscv::R(raw_inst).unpack() == (crate::riscv::OPCODE_CUSTOM_0, FUNC3_ECALLI, 0, RReg::Zero, RReg::Zero, RReg::Zero) {
2983            let initial_offset = relative_offset as u64;
2984            let pointer_size = if elf.is_64() { 8 } else { 4 };
2985
2986            // so (on 32-bit): 4 (ecalli) + 4 (pointer) = 8
2987            if relative_offset + pointer_size + 4 > text.len() {
2988                return Err(ProgramFromElfError::other("truncated ecalli instruction"));
2989            }
2990
2991            let target_location = current_location.add(4);
2992            relative_offset += 4 + pointer_size;
2993
2994            let Some(relocation) = relocations.get(&target_location) else {
2995                return Err(ProgramFromElfError::other(format!(
2996                    "found an external call without a relocation for a pointer to metadata at {target_location}"
2997                )));
2998            };
2999
3000            let metadata_location = match relocation {
3001                RelocationKind::Abs {
3002                    target,
3003                    size: RelocationSize::U64,
3004                } if elf.is_64() => target,
3005                RelocationKind::Abs {
3006                    target,
3007                    size: RelocationSize::U32,
3008                } if !elf.is_64() => target,
3009                _ => {
3010                    return Err(ProgramFromElfError::other(format!(
3011                        "found an external call with an unexpected relocation at {target_location}: {relocation:?}"
3012                    )));
3013                }
3014            };
3015
3016            let metadata = parse_extern_metadata(elf, relocations, *metadata_location)?;
3017
3018            // The same import can be inlined in multiple places, so deduplicate those here.
3019            let nth_import = match metadata_to_nth_import.entry(metadata) {
3020                std::collections::hash_map::Entry::Vacant(entry) => {
3021                    let nth_import = imports.len();
3022                    imports.push(Import {
3023                        metadata: entry.key().clone(),
3024                    });
3025                    entry.insert(nth_import);
3026                    nth_import
3027                }
3028                std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
3029            };
3030
3031            output.push((
3032                Source {
3033                    section_index,
3034                    offset_range: AddressRange::from(initial_offset..relative_offset as u64),
3035                },
3036                InstExt::Basic(BasicInst::Ecalli { nth_import }),
3037            ));
3038
3039            continue;
3040        }
3041
3042        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_SBRK, 0, dst, size, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
3043            let Some(dst) = cast_reg_non_zero(dst)? else {
3044                return Err(ProgramFromElfError::other(
3045                    "found an 'sbrk' instruction with the zero register as the destination",
3046                ));
3047            };
3048
3049            let Some(size) = cast_reg_non_zero(size)? else {
3050                return Err(ProgramFromElfError::other(
3051                    "found an 'sbrk' instruction with the zero register as the size",
3052                ));
3053            };
3054
3055            output.push((
3056                Source {
3057                    section_index,
3058                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
3059                },
3060                InstExt::Basic(BasicInst::Sbrk { dst, size }),
3061            ));
3062
3063            relative_offset += inst_size as usize;
3064            continue;
3065        }
3066
3067        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_MEMSET, 0, RReg::Zero, RReg::Zero, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
3068            output.push((
3069                Source {
3070                    section_index,
3071                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
3072                },
3073                InstExt::Basic(BasicInst::Memset),
3074            ));
3075
3076            relative_offset += inst_size as usize;
3077            continue;
3078        }
3079
3080        if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_HEAP_BASE, 0, dst, RReg::Zero, RReg::Zero) = crate::riscv::R(raw_inst).unpack() {
3081            output.push((
3082                Source {
3083                    section_index,
3084                    offset_range: (relative_offset as u64..relative_offset as u64 + inst_size).into(),
3085                },
3086                match cast_reg_non_zero(dst)? {
3087                    Some(dst) => InstExt::Basic(BasicInst::LoadHeapBase { dst }),
3088                    None => InstExt::Basic(BasicInst::Nop),
3089                },
3090            ));
3091
3092            relative_offset += inst_size as usize;
3093            continue;
3094        }
3095
3096        let source = Source {
3097            section_index,
3098            offset_range: AddressRange::from(relative_offset as u64..relative_offset as u64 + inst_size),
3099        };
3100
3101        relative_offset += inst_size as usize;
3102
3103        let Some(original_inst) = Inst::decode(decoder_config, raw_inst) else {
3104            return Err(ProgramFromElfErrorKind::Other(
3105                format!(
3106                    "unsupported instruction in {} ('{}') at address 0x{:x}: 0x{:08x}",
3107                    current_location,
3108                    section.name(),
3109                    section.original_address() + current_location.offset,
3110                    raw_inst,
3111                )
3112                .into(),
3113            )
3114            .into());
3115        };
3116
3117        if let Some(inst) = instruction_overrides.remove(&current_location) {
3118            output.push((source, inst));
3119        } else {
3120            // For some reason (compiler bug?) *very rarely* we have those AUIPC instructions
3121            // without any relocation attached to them, so let's deal with them traditionally.
3122            if let Inst::AddUpperImmediateToPc {
3123                dst: base_upper,
3124                value: value_upper,
3125            } = original_inst
3126            {
3127                if relative_offset < text.len() {
3128                    let (next_inst_size, next_inst) = read_instruction_bytes(text, relative_offset);
3129                    let next_inst = Inst::decode(decoder_config, next_inst);
3130
3131                    if let Some(Inst::JumpAndLinkRegister { dst: ra_dst, base, value }) = next_inst {
3132                        if base == ra_dst && base == base_upper {
3133                            if let Some(ra) = cast_reg_non_zero(ra_dst)? {
3134                                let offset = (relative_offset as i32 - next_inst_size as i32)
3135                                    .wrapping_add(value)
3136                                    .wrapping_add(value_upper as i32);
3137                                if offset >= 0 && offset < section.data().len() as i32 {
3138                                    output.push((
3139                                        source,
3140                                        InstExt::Control(ControlInst::Call {
3141                                            ra,
3142                                            target: SectionTarget {
3143                                                section_index,
3144                                                offset: u64::from(cast(offset).to_unsigned()),
3145                                            },
3146                                            target_return: current_location.add(inst_size + next_inst_size),
3147                                        }),
3148                                    ));
3149
3150                                    relative_offset += next_inst_size as usize;
3151                                    continue;
3152                                }
3153                            }
3154                        }
3155                    // This can happen when a function grabs its own address (to e.g. seed an RNG).
3156                    } else if let Some(Inst::RegImm {
3157                        kind,
3158                        dst: add_dst,
3159                        src: add_src,
3160                        imm: value_lower,
3161                    }) = next_inst
3162                    {
3163                        if base_upper == add_src
3164                            && ((elf.is_64() && kind == RegImmKind::Add64) || (!elf.is_64() && kind == RegImmKind::Add32))
3165                        {
3166                            let offset = value_upper.wrapping_add(cast(value_lower).to_unsigned());
3167                            let offset = cast(offset).to_signed();
3168                            let offset = cast(offset).to_i64_sign_extend();
3169                            let offset = current_location.offset.wrapping_add_signed(offset);
3170                            if offset >= section.size() {
3171                                return Err(ProgramFromElfError::other(format!(
3172                                    "found an unrelocated auipc instruction in {} ('{}') at address 0x{:x} with an oversized offset (offset = {}, section size = {})",
3173                                    current_location,
3174                                    section.name(),
3175                                    section.original_address() + current_location.offset,
3176                                    offset,
3177                                    section.size(),
3178                                )));
3179                            }
3180
3181                            if let Some(dst) = cast_reg_non_zero(add_dst)? {
3182                                output.push((
3183                                    source,
3184                                    InstExt::Basic(BasicInst::LoadAddress {
3185                                        dst,
3186                                        target: SectionTarget {
3187                                            section_index: section.index(),
3188                                            offset,
3189                                        },
3190                                    }),
3191                                ));
3192                            }
3193
3194                            if base_upper != add_dst {
3195                                if let Some(base_upper) = cast_reg_non_zero(base_upper)? {
3196                                    let Some(dst) = cast_reg_non_zero(add_dst)? else {
3197                                        return Err(ProgramFromElfError::other(format!(
3198                                            "found an unrelocated auipc instruction in {} ('{}') at address 0x{:x}: unimplemented: destination register is zero",
3199                                            current_location,
3200                                            section.name(),
3201                                            section.original_address() + current_location.offset,
3202                                        )));
3203                                    };
3204                                    let Ok(offset) = offset.try_into() else {
3205                                        return Err(ProgramFromElfError::other(format!(
3206                                            "found an unrelocated auipc instruction in {} ('{}') at address 0x{:x}: offset doesn't fit in 32-bits",
3207                                            current_location,
3208                                            section.name(),
3209                                            section.original_address() + current_location.offset,
3210                                        )));
3211                                    };
3212                                    output.push((
3213                                        source,
3214                                        InstExt::Basic(BasicInst::AnyAny {
3215                                            kind: if elf.is_64() { AnyAnyKind::Sub64 } else { AnyAnyKind::Sub32 },
3216                                            dst: base_upper,
3217                                            src1: RegImm::Reg(dst),
3218                                            src2: RegImm::Imm(offset),
3219                                        }),
3220                                    ))
3221                                }
3222                            }
3223
3224                            relative_offset += next_inst_size as usize;
3225                            continue;
3226                        }
3227                    }
3228                }
3229            }
3230
3231            if matches!(opt_level, OptLevel::Oexperimental) {
3232                if try_parse_prologue(decoder_config, elf, original_inst, &mut relative_offset, source, text, output)? {
3233                    continue;
3234                }
3235
3236                if try_parse_epilogue(decoder_config, elf, original_inst, &mut relative_offset, source, text, output)? {
3237                    continue;
3238                }
3239            }
3240
3241            let original_length = output.len();
3242            convert_instruction(elf, section, current_location, original_inst, inst_size, elf.is_64(), |inst| {
3243                output.push((source, inst));
3244            })?;
3245
3246            // We need to always emit at least one instruction (even if it's a NOP) to handle potential jumps.
3247            assert_ne!(
3248                output.len(),
3249                original_length,
3250                "internal error: no instructions were emitted for instruction {original_inst:?} in section {section_name}"
3251            );
3252        }
3253    }
3254
3255    Ok(())
3256}
3257
3258fn split_code_into_basic_blocks(
3259    elf: &Elf,
3260    section_to_function_name: &BTreeMap<SectionTarget, String>,
3261    jump_targets: &HashSet<SectionTarget>,
3262    instructions: Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
3263) -> Result<Vec<BasicBlock<SectionTarget, SectionTarget>>, ProgramFromElfError> {
3264    #[cfg(test)]
3265    let _ = elf;
3266
3267    let mut blocks: Vec<BasicBlock<SectionTarget, SectionTarget>> = Vec::new();
3268    let mut current_block: Vec<(SourceStack, BasicInst<SectionTarget>)> = Vec::new();
3269    let mut block_start_opt = None;
3270    let mut last_source_in_block = None;
3271    #[cfg(not(test))]
3272    let mut current_symbol = "";
3273    for (source, op) in instructions {
3274        // TODO: This panics because we use a dummy ELF in tests; fix it.
3275        #[cfg(not(test))]
3276        {
3277            if let Some(name) = section_to_function_name.get(&source.begin()) {
3278                current_symbol = name;
3279            }
3280            log::trace!(
3281                "Instruction at {source} (0x{:x}) \"{current_symbol}\": {op:?}",
3282                elf.section_by_index(source.section_index).original_address() + source.offset_range.start
3283            );
3284        }
3285
3286        if let Some(last_source_in_block) = last_source_in_block {
3287            // Handle the case where we've emitted multiple instructions from a single RISC-V instruction.
3288            if source == last_source_in_block {
3289                let InstExt::Basic(instruction) = op else { unreachable!() };
3290                current_block.push((source.into(), instruction));
3291                continue;
3292            }
3293        }
3294
3295        assert!(source.offset_range.start < source.offset_range.end);
3296
3297        let is_jump_target = jump_targets.contains(&source.begin());
3298        let (block_section, block_start) = if !is_jump_target {
3299            // Make sure nothing wants to jump into the middle of this instruction.
3300            assert!((source.offset_range.start..source.offset_range.end)
3301                .step_by(2)
3302                .skip(1)
3303                .all(|offset| !jump_targets.contains(&SectionTarget {
3304                    section_index: source.section_index,
3305                    offset
3306                })));
3307
3308            if let Some((block_section, block_start)) = block_start_opt {
3309                // We're in a block that's reachable by a jump.
3310                (block_section, block_start)
3311            } else {
3312                // Nothing can possibly jump here, so just skip this instruction.
3313                log::trace!("Skipping dead instruction at {}: {:?}", source.begin(), op);
3314                continue;
3315            }
3316        } else {
3317            // Control flow can jump to this instruction.
3318            if let Some((block_section, block_start)) = block_start_opt.take() {
3319                // End the current basic block to prevent a jump into the middle of it.
3320                if !current_block.is_empty() {
3321                    let block_index = BlockTarget::from_raw(blocks.len());
3322                    let block_source = Source {
3323                        section_index: block_section,
3324                        offset_range: (block_start..source.offset_range.start).into(),
3325                    };
3326
3327                    let last_instruction_source = current_block.last().unwrap().0.as_slice()[0];
3328                    assert_eq!(last_instruction_source.section_index, block_section);
3329
3330                    let end_of_block_source = Source {
3331                        section_index: block_section,
3332                        offset_range: (last_instruction_source.offset_range.start..source.offset_range.start).into(),
3333                    };
3334
3335                    assert!(block_source.offset_range.start < block_source.offset_range.end);
3336                    assert!(end_of_block_source.offset_range.start < end_of_block_source.offset_range.end);
3337
3338                    log::trace!("Emitting block (due to a potential jump): {}", block_source.begin());
3339                    blocks.push(BasicBlock::new(
3340                        block_index,
3341                        block_source,
3342                        core::mem::take(&mut current_block),
3343                        EndOfBlock {
3344                            source: end_of_block_source.into(),
3345                            instruction: ControlInst::Jump { target: source.begin() },
3346                        },
3347                        section_to_function_name.contains_key(&block_source.begin()),
3348                    ));
3349                }
3350            }
3351
3352            block_start_opt = Some((source.section_index, source.offset_range.start));
3353            (source.section_index, source.offset_range.start)
3354        };
3355
3356        match op {
3357            InstExt::Control(instruction) => {
3358                last_source_in_block = None;
3359                block_start_opt = None;
3360
3361                let block_index = BlockTarget::from_raw(blocks.len());
3362                let block_source = Source {
3363                    section_index: block_section,
3364                    offset_range: (block_start..source.offset_range.end).into(),
3365                };
3366
3367                log::trace!("Emitting block (due to a control instruction): {}", block_source.begin());
3368                blocks.push(BasicBlock::new(
3369                    block_index,
3370                    block_source,
3371                    core::mem::take(&mut current_block),
3372                    EndOfBlock {
3373                        source: source.into(),
3374                        instruction,
3375                    },
3376                    section_to_function_name.contains_key(&block_source.begin()),
3377                ));
3378
3379                if let ControlInst::Branch { target_false, .. } = instruction {
3380                    if !cfg!(test) {
3381                        if source.section_index != target_false.section_index {
3382                            return Err(ProgramFromElfError::other("found a branch with a fallthrough to another section"));
3383                        }
3384                        assert_eq!(source.offset_range.end, target_false.offset);
3385                    }
3386                    block_start_opt = Some((block_section, source.offset_range.end));
3387                }
3388            }
3389            InstExt::Basic(instruction) => {
3390                last_source_in_block = Some(source);
3391                current_block.push((source.into(), instruction));
3392            }
3393        }
3394    }
3395
3396    if !current_block.is_empty() {
3397        return Err(ProgramFromElfError::other(
3398            "code doesn't end with a control-flow affecting instruction",
3399        ));
3400    }
3401
3402    Ok(blocks)
3403}
3404
3405fn build_section_to_block_map(
3406    blocks: &[BasicBlock<SectionTarget, SectionTarget>],
3407) -> Result<HashMap<SectionTarget, BlockTarget>, ProgramFromElfError> {
3408    let mut section_to_block = HashMap::new();
3409    for (block_index, block) in blocks.iter().enumerate() {
3410        let section_target = block.source.begin();
3411        let block_target = BlockTarget::from_raw(block_index);
3412        if section_to_block.insert(section_target, block_target).is_some() {
3413            return Err(ProgramFromElfError::other("found two or more basic blocks with the same location"));
3414        }
3415    }
3416
3417    Ok(section_to_block)
3418}
3419
3420fn resolve_basic_block_references(
3421    data_sections_set: &HashSet<SectionIndex>,
3422    section_to_block: &HashMap<SectionTarget, BlockTarget>,
3423    blocks: &[BasicBlock<SectionTarget, SectionTarget>],
3424) -> Result<Vec<BasicBlock<AnyTarget, BlockTarget>>, ProgramFromElfError> {
3425    let mut output = Vec::with_capacity(blocks.len());
3426    for block in blocks {
3427        let mut ops = Vec::with_capacity(block.ops.len());
3428        for (source, op) in &block.ops {
3429            let map = |target: SectionTarget| {
3430                if data_sections_set.contains(&target.section_index) {
3431                    Ok(AnyTarget::Data(target))
3432                } else if let Some(&target) = section_to_block.get(&target) {
3433                    Ok(AnyTarget::Code(target))
3434                } else {
3435                    return Err(ProgramFromElfError::other(format!(
3436                        "found basic instruction which doesn't point to a data section nor resolve to any basic block: {source:?}, {op:?}",
3437                    )));
3438                }
3439            };
3440
3441            let op = op.clone().map_target(map)?;
3442            ops.push((source.clone(), op));
3443        }
3444
3445        let Ok(next) = block
3446            .next
3447            .clone()
3448            .map_target(|section_target| section_to_block.get(&section_target).copied().ok_or(()))
3449        else {
3450            return Err(ProgramFromElfError::other(format!(
3451                "found control instruction at the end of block at {block_source} whose target doesn't resolve to any basic block: {next:?}",
3452                block_source = block.source,
3453                next = block.next.instruction,
3454            )));
3455        };
3456
3457        output.push(BasicBlock::new(block.target, block.source, ops, next, block.is_function));
3458    }
3459
3460    Ok(output)
3461}
3462
3463fn garbage_collect_reachability(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &mut ReachabilityGraph) -> bool {
3464    let mut queue_code = VecSet::new();
3465    let mut queue_data = VecSet::new();
3466    for (block_target, reachability) in &reachability_graph.for_code {
3467        if reachability.always_reachable_or_exported() {
3468            queue_code.push(*block_target);
3469        }
3470    }
3471
3472    for (data_target, reachability) in &reachability_graph.for_data {
3473        if reachability.always_reachable_or_exported() {
3474            queue_data.push(*data_target);
3475        }
3476    }
3477
3478    while !queue_code.is_empty() || !queue_data.is_empty() {
3479        while let Some(block_target) = queue_code.pop_unique() {
3480            each_reference(&all_blocks[block_target.index()], |ext| match ext {
3481                ExtRef::Jump(target) | ExtRef::Address(target) => queue_code.push(target),
3482                ExtRef::DataAddress(target) => queue_data.push(target),
3483            });
3484        }
3485
3486        while let Some(data_target) = queue_data.pop_unique() {
3487            if let Some(list) = reachability_graph.code_references_in_data_section.get(&data_target) {
3488                for &target in list {
3489                    queue_code.push(target);
3490                }
3491            }
3492
3493            if let Some(list) = reachability_graph.data_references_in_data_section.get(&data_target) {
3494                for &target in list {
3495                    queue_data.push(target);
3496                }
3497            }
3498        }
3499    }
3500
3501    let set_code = queue_code.into_set();
3502    let set_data = queue_data.into_set();
3503    if set_code.len() == reachability_graph.for_code.len() && set_data.len() == reachability_graph.for_data.len() {
3504        return false;
3505    }
3506
3507    log::debug!(
3508        "Code reachability garbage collection: {} -> {}",
3509        reachability_graph.for_code.len(),
3510        set_code.len()
3511    );
3512    reachability_graph.for_code.retain(|block_target, reachability| {
3513        reachability.reachable_from.retain(|inner_key| set_code.contains(inner_key));
3514        reachability.address_taken_in.retain(|inner_key| set_code.contains(inner_key));
3515        reachability.referenced_by_data.retain(|inner_key| set_data.contains(inner_key));
3516        if !set_code.contains(block_target) {
3517            assert!(!reachability.always_reachable);
3518            log::trace!("  Garbage collected: {block_target:?}");
3519            false
3520        } else {
3521            true
3522        }
3523    });
3524
3525    assert_eq!(reachability_graph.for_code.len(), set_code.len());
3526
3527    log::debug!(
3528        "Data reachability garbage collection: {} -> {}",
3529        reachability_graph.for_data.len(),
3530        set_data.len()
3531    );
3532    reachability_graph.for_data.retain(|data_target, reachability| {
3533        assert!(reachability.reachable_from.is_empty());
3534        reachability.address_taken_in.retain(|inner_key| set_code.contains(inner_key));
3535        reachability.referenced_by_data.retain(|inner_key| set_data.contains(inner_key));
3536        if !set_data.contains(data_target) {
3537            assert!(!reachability.always_reachable);
3538            log::trace!("  Garbage collected: {data_target:?}");
3539            false
3540        } else {
3541            true
3542        }
3543    });
3544
3545    reachability_graph.code_references_in_data_section.retain(|data_target, list| {
3546        if !set_data.contains(data_target) {
3547            false
3548        } else {
3549            assert!(list.iter().all(|block_target| set_code.contains(block_target)));
3550            true
3551        }
3552    });
3553
3554    reachability_graph.data_references_in_data_section.retain(|data_target, list| {
3555        if !set_data.contains(data_target) {
3556            false
3557        } else {
3558            assert!(list.iter().all(|next_data_target| set_data.contains(next_data_target)));
3559            true
3560        }
3561    });
3562
3563    assert_eq!(reachability_graph.for_data.len(), set_data.len());
3564    true
3565}
3566
3567fn remove_unreachable_code_impl(
3568    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3569    reachability_graph: &mut ReachabilityGraph,
3570    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3571    queue_code: &mut VecSet<BlockTarget>,
3572    queue_data: &mut VecSet<SectionIndex>,
3573    current: BlockTarget,
3574) {
3575    assert!(reachability_graph.for_code.get(&current).unwrap().is_unreachable());
3576    log::trace!("Removing {current:?} from the graph...");
3577
3578    each_reference(&all_blocks[current.index()], |ext| match ext {
3579        ExtRef::Jump(target) => {
3580            log::trace!("{target:?} is not reachable from {current:?} anymore");
3581            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3582            reachability.reachable_from.remove(&current);
3583            if reachability.is_unreachable() {
3584                log::trace!("{target:?} is now unreachable!");
3585                queue_code.push(target)
3586            } else if let Some(ref mut optimize_queue) = optimize_queue {
3587                optimize_queue.push(target);
3588            }
3589        }
3590        ExtRef::Address(target) => {
3591            log::trace!("{target:?}'s address is not taken in {current:?} anymore");
3592            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3593            reachability.address_taken_in.remove(&current);
3594            if reachability.is_unreachable() {
3595                log::trace!("{target:?} is now unreachable!");
3596                queue_code.push(target)
3597            } else if let Some(ref mut optimize_queue) = optimize_queue {
3598                optimize_queue.push(target);
3599            }
3600        }
3601        ExtRef::DataAddress(target) => {
3602            log::trace!("{target:?}'s address is not taken in {current:?} anymore");
3603            let reachability = reachability_graph.for_data.get_mut(&target).unwrap();
3604            reachability.address_taken_in.remove(&current);
3605            if reachability.is_unreachable() {
3606                log::trace!("{target:?} is now unreachable!");
3607                queue_data.push(target);
3608            }
3609        }
3610    });
3611
3612    reachability_graph.for_code.remove(&current);
3613}
3614
3615fn remove_unreachable_data_impl(
3616    reachability_graph: &mut ReachabilityGraph,
3617    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3618    queue_code: &mut VecSet<BlockTarget>,
3619    queue_data: &mut VecSet<SectionIndex>,
3620    current: SectionIndex,
3621) {
3622    assert!(reachability_graph.for_data.get(&current).unwrap().is_unreachable());
3623    log::trace!("Removing {current:?} from the graph...");
3624
3625    let code_refs = reachability_graph.code_references_in_data_section.remove(&current);
3626    let data_refs = reachability_graph.data_references_in_data_section.remove(&current);
3627
3628    if let Some(list) = code_refs {
3629        for target in list {
3630            log::trace!("{target:?} is not reachable from {current:?} anymore");
3631            let reachability = reachability_graph.for_code.get_mut(&target).unwrap();
3632            reachability.referenced_by_data.remove(&current);
3633            if reachability.is_unreachable() {
3634                log::trace!("{target:?} is now unreachable!");
3635                queue_code.push(target)
3636            } else if let Some(ref mut optimize_queue) = optimize_queue {
3637                optimize_queue.push(target);
3638            }
3639        }
3640    }
3641
3642    if let Some(list) = data_refs {
3643        for target in list {
3644            log::trace!("{target:?} is not reachable from {current:?} anymore");
3645            let reachability = reachability_graph.for_data.get_mut(&target).unwrap();
3646            reachability.referenced_by_data.remove(&current);
3647            if reachability.is_unreachable() {
3648                log::trace!("{target:?} is now unreachable!");
3649                queue_data.push(target)
3650            }
3651        }
3652    }
3653
3654    reachability_graph.for_data.remove(&current);
3655}
3656
3657fn remove_code_if_globally_unreachable(
3658    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3659    reachability_graph: &mut ReachabilityGraph,
3660    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3661    block_target: BlockTarget,
3662) {
3663    let Some(reachability) = reachability_graph.for_code.get(&block_target) else {
3664        return;
3665    };
3666    if !reachability.is_unreachable() {
3667        return;
3668    }
3669
3670    // The inner block is now globally unreachable.
3671    let mut queue_code = VecSet::new();
3672    let mut queue_data = VecSet::new();
3673    remove_unreachable_code_impl(
3674        all_blocks,
3675        reachability_graph,
3676        optimize_queue.as_deref_mut(),
3677        &mut queue_code,
3678        &mut queue_data,
3679        block_target,
3680    );
3681
3682    // If there are other dependencies which are now unreachable then remove them too.
3683    while !queue_code.is_empty() || !queue_data.is_empty() {
3684        while let Some(next) = queue_code.pop_unique() {
3685            remove_unreachable_code_impl(
3686                all_blocks,
3687                reachability_graph,
3688                optimize_queue.as_deref_mut(),
3689                &mut queue_code,
3690                &mut queue_data,
3691                next,
3692            );
3693        }
3694
3695        while let Some(next) = queue_data.pop_unique() {
3696            remove_unreachable_data_impl(
3697                reachability_graph,
3698                optimize_queue.as_deref_mut(),
3699                &mut queue_code,
3700                &mut queue_data,
3701                next,
3702            );
3703        }
3704    }
3705}
3706
3707fn remove_if_data_is_globally_unreachable(
3708    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3709    reachability_graph: &mut ReachabilityGraph,
3710    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3711    data_target: SectionIndex,
3712) {
3713    let Some(reachability) = reachability_graph.for_data.get(&data_target) else {
3714        return;
3715    };
3716    if !reachability.is_unreachable() {
3717        return;
3718    }
3719
3720    let mut queue_code = VecSet::new();
3721    let mut queue_data = VecSet::new();
3722    remove_unreachable_data_impl(
3723        reachability_graph,
3724        optimize_queue.as_deref_mut(),
3725        &mut queue_code,
3726        &mut queue_data,
3727        data_target,
3728    );
3729
3730    // If there are other dependencies which are now unreachable then remove them too.
3731    while !queue_code.is_empty() || !queue_data.is_empty() {
3732        while let Some(next) = queue_code.pop_unique() {
3733            remove_unreachable_code_impl(
3734                all_blocks,
3735                reachability_graph,
3736                optimize_queue.as_deref_mut(),
3737                &mut queue_code,
3738                &mut queue_data,
3739                next,
3740            );
3741        }
3742
3743        while let Some(next) = queue_data.pop_unique() {
3744            remove_unreachable_data_impl(
3745                reachability_graph,
3746                optimize_queue.as_deref_mut(),
3747                &mut queue_code,
3748                &mut queue_data,
3749                next,
3750            );
3751        }
3752    }
3753}
3754
3755fn add_to_optimize_queue(
3756    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3757    reachability_graph: &ReachabilityGraph,
3758    optimize_queue: &mut VecSet<BlockTarget>,
3759    block_target: BlockTarget,
3760) {
3761    let Some(reachability) = reachability_graph.for_code.get(&block_target) else {
3762        return;
3763    };
3764    if reachability.is_unreachable() {
3765        return;
3766    }
3767
3768    optimize_queue.push(block_target);
3769
3770    for &previous in &reachability.reachable_from {
3771        optimize_queue.push(previous);
3772    }
3773
3774    for &previous in &reachability.address_taken_in {
3775        optimize_queue.push(previous);
3776    }
3777
3778    for &next in all_blocks[block_target.index()].next.instruction.targets().into_iter().flatten() {
3779        optimize_queue.push(next);
3780    }
3781
3782    each_reference(&all_blocks[block_target.index()], |ext| match ext {
3783        ExtRef::Jump(target) => optimize_queue.push(target),
3784        ExtRef::Address(target) => optimize_queue.push(target),
3785        ExtRef::DataAddress(..) => {}
3786    });
3787}
3788
3789fn perform_nop_elimination(all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>], current: BlockTarget) {
3790    all_blocks[current.index()].ops.retain(|(_, instruction)| !instruction.is_nop());
3791}
3792
3793fn perform_meta_instruction_lowering(is_rv64: bool, all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>], current: BlockTarget) {
3794    let block = &mut all_blocks[current.index()];
3795    if !block
3796        .ops
3797        .iter()
3798        .any(|(_, op)| matches!(op, BasicInst::Prologue { .. } | BasicInst::Epilogue { .. }))
3799    {
3800        return;
3801    }
3802
3803    let (add_kind, store_kind, load_kind) = if is_rv64 {
3804        (AnyAnyKind::Add64, StoreKind::U64, LoadKind::U64)
3805    } else {
3806        (AnyAnyKind::Add32, StoreKind::U32, LoadKind::U64)
3807    };
3808
3809    let mut buffer = Vec::with_capacity(block.ops.len());
3810    for (source, op) in block.ops.drain(..) {
3811        match op {
3812            BasicInst::Prologue { stack_space, ref regs } => {
3813                buffer.push((
3814                    source.clone(),
3815                    BasicInst::AnyAny {
3816                        kind: add_kind,
3817                        dst: Reg::SP,
3818                        src1: Reg::SP.into(),
3819                        src2: (cast(stack_space).to_signed() * -1).into(),
3820                    },
3821                ));
3822                for &(offset, src) in regs {
3823                    buffer.push((
3824                        source.clone(),
3825                        BasicInst::StoreIndirect {
3826                            kind: store_kind,
3827                            src: src.into(),
3828                            base: Reg::SP,
3829                            offset: cast(offset).to_signed(),
3830                        },
3831                    ));
3832                }
3833            }
3834            BasicInst::Epilogue { stack_space, ref regs } => {
3835                for &(offset, dst) in regs {
3836                    buffer.push((
3837                        source.clone(),
3838                        BasicInst::LoadIndirect {
3839                            kind: load_kind,
3840                            dst,
3841                            base: Reg::SP,
3842                            offset: cast(offset).to_signed(),
3843                        },
3844                    ));
3845                }
3846
3847                buffer.push((
3848                    source.clone(),
3849                    BasicInst::AnyAny {
3850                        kind: add_kind,
3851                        dst: Reg::SP,
3852                        src1: Reg::SP.into(),
3853                        src2: cast(stack_space).to_signed().into(),
3854                    },
3855                ));
3856            }
3857            _ => {
3858                buffer.push((source, op));
3859            }
3860        };
3861    }
3862
3863    block.ops = buffer;
3864}
3865
3866#[deny(clippy::as_conversions)]
3867fn perform_inlining(
3868    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3869    reachability_graph: &mut ReachabilityGraph,
3870    exports: &mut [Export],
3871    optimize_queue: Option<&mut VecSet<BlockTarget>>,
3872    inline_history: &mut HashSet<(BlockTarget, BlockTarget)>,
3873    inline_threshold: usize,
3874    current: BlockTarget,
3875) -> bool {
3876    fn is_infinite_loop(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], current: BlockTarget) -> bool {
3877        all_blocks[current.index()].next.instruction == ControlInst::Jump { target: current }
3878    }
3879
3880    fn inline(
3881        all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
3882        reachability_graph: &mut ReachabilityGraph,
3883        mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
3884        outer: BlockTarget,
3885        inner: BlockTarget,
3886    ) {
3887        log::trace!("Inlining {inner:?} into {outer:?}...");
3888        log::trace!("  {outer:?} will now end with: {:?}", all_blocks[inner.index()].next.instruction);
3889
3890        if let Some(ref mut optimize_queue) = optimize_queue {
3891            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, outer);
3892            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, inner);
3893        }
3894
3895        // Inlining into ourselves doesn't make sense.
3896        assert_ne!(outer, inner);
3897
3898        // No infinite loops.
3899        assert!(!is_infinite_loop(all_blocks, inner));
3900
3901        // Make sure this block actually goes to the block we're inlining.
3902        assert_eq!(all_blocks[outer.index()].next.instruction, ControlInst::Jump { target: inner });
3903
3904        // The inner block is not reachable from here anymore.
3905        // NOTE: This needs to be done *before* adding the references below,
3906        //       as the inner block might be an infinite loop.
3907        reachability_graph.for_code.get_mut(&inner).unwrap().reachable_from.remove(&outer);
3908
3909        // Everything which the inner block accesses will be reachable from here, so update reachability.
3910        each_reference(&all_blocks[inner.index()], |ext| match ext {
3911            ExtRef::Jump(target) => {
3912                reachability_graph.for_code.entry(target).or_default().reachable_from.insert(outer);
3913            }
3914            ExtRef::Address(target) => {
3915                reachability_graph
3916                    .for_code
3917                    .entry(target)
3918                    .or_default()
3919                    .address_taken_in
3920                    .insert(outer);
3921            }
3922            ExtRef::DataAddress(target) => {
3923                reachability_graph
3924                    .for_data
3925                    .entry(target)
3926                    .or_default()
3927                    .address_taken_in
3928                    .insert(outer);
3929            }
3930        });
3931
3932        // Remove it from the graph if it's globally unreachable now.
3933        remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, inner);
3934
3935        let outer_source = all_blocks[outer.index()].next.source.clone();
3936        let inner_source = all_blocks[inner.index()].next.source.clone();
3937        let inner_code: Vec<_> = all_blocks[inner.index()]
3938            .ops
3939            .iter()
3940            .map(|(inner_source, op)| (outer_source.overlay_on_top_of(inner_source), op.clone()))
3941            .collect();
3942
3943        all_blocks[outer.index()].ops.extend(inner_code);
3944        all_blocks[outer.index()].next.source.overlay_on_top_of_inplace(&inner_source);
3945        all_blocks[outer.index()].next.instruction = all_blocks[inner.index()].next.instruction;
3946    }
3947
3948    fn should_inline(
3949        all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
3950        reachability_graph: &ReachabilityGraph,
3951        current: BlockTarget,
3952        target: BlockTarget,
3953        inline_threshold: usize,
3954    ) -> bool {
3955        // Don't inline if it's an infinite loop.
3956        if target == current || is_infinite_loop(all_blocks, target) {
3957            return false;
3958        }
3959
3960        if let Some(fallthrough_target) = all_blocks[target.index()].next.instruction.fallthrough_target() {
3961            if fallthrough_target.index() == target.index() + 1 {
3962                // Do not inline if we'd need to inject a new fallthrough basic block.
3963                return false;
3964            }
3965        }
3966
3967        // Inline if the target block is small enough.
3968        let mut inline_cost = all_blocks[target.index()].ops.len();
3969        if let Some((_, BasicInst::Prologue { regs, .. })) = all_blocks[target.index()].ops.first() {
3970            inline_cost += regs.len();
3971        }
3972        if let Some((_, BasicInst::Epilogue { regs, .. })) = all_blocks[target.index()].ops.last() {
3973            inline_cost += regs.len();
3974        }
3975
3976        if inline_cost <= inline_threshold {
3977            return true;
3978        }
3979
3980        // Inline if the target block is only reachable from here.
3981        if let Some(reachability) = reachability_graph.for_code.get(&target) {
3982            if reachability.is_only_reachable_from(current) {
3983                return true;
3984            }
3985        }
3986
3987        false
3988    }
3989
3990    if !reachability_graph.is_code_reachable(current) {
3991        return false;
3992    }
3993
3994    let block = &all_blocks[current.index()];
3995    match block.next.instruction {
3996        ControlInst::Jump { target } => {
3997            if all_blocks[current.index()].ops.is_empty() && inline_history.insert((current, target)) {
3998                let reachability = reachability_graph.for_code.get_mut(&current).unwrap();
3999                if !reachability.exports.is_empty() {
4000                    let export_indexes = core::mem::take(&mut reachability.exports);
4001                    for &export_index in &export_indexes {
4002                        exports[export_index].location = all_blocks[target.index()].source.begin();
4003                    }
4004                    reachability_graph.for_code.get_mut(&target).unwrap().exports.extend(export_indexes);
4005                    remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, current);
4006                    return true;
4007                }
4008            }
4009
4010            if should_inline(all_blocks, reachability_graph, current, target, inline_threshold) && inline_history.insert((current, target))
4011            {
4012                inline(all_blocks, reachability_graph, optimize_queue, current, target);
4013                return true;
4014            }
4015        }
4016        ControlInst::Branch {
4017            kind,
4018            src1,
4019            src2,
4020            target_true,
4021            target_false,
4022        } => {
4023            if let ControlInst::Jump { target } = all_blocks[target_true.index()].next.instruction {
4024                if target != target_true && all_blocks[target_true.index()].ops.is_empty() {
4025                    // We're branching to another block which immediately jumps somewhere else.
4026                    // So skip the middle-man and just jump where we want to go directly.
4027                    assert!(reachability_graph
4028                        .for_code
4029                        .get_mut(&target_true)
4030                        .unwrap()
4031                        .reachable_from
4032                        .remove(&current));
4033
4034                    reachability_graph.for_code.get_mut(&target).unwrap().reachable_from.insert(current);
4035                    all_blocks[current.index()].next.instruction = ControlInst::Branch {
4036                        kind,
4037                        src1,
4038                        src2,
4039                        target_true: target,
4040                        target_false,
4041                    };
4042
4043                    remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue, target_true);
4044                    return true;
4045                }
4046            }
4047        }
4048        ControlInst::Call { .. } => unreachable!(),
4049        _ => {}
4050    }
4051
4052    false
4053}
4054
4055fn gather_references(block: &BasicBlock<AnyTarget, BlockTarget>) -> BTreeSet<ExtRef> {
4056    let mut references = BTreeSet::new();
4057    each_reference(block, |ext| {
4058        references.insert(ext);
4059    });
4060    references
4061}
4062
4063fn update_references(
4064    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
4065    reachability_graph: &mut ReachabilityGraph,
4066    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
4067    block_target: BlockTarget,
4068    mut old_references: BTreeSet<ExtRef>,
4069) {
4070    let mut new_references = gather_references(&all_blocks[block_target.index()]);
4071    new_references.retain(|ext| !old_references.remove(ext));
4072
4073    for ext in &old_references {
4074        match ext {
4075            ExtRef::Jump(target) => {
4076                log::trace!("{target:?} is not reachable from {block_target:?} anymore");
4077                reachability_graph
4078                    .for_code
4079                    .get_mut(target)
4080                    .unwrap()
4081                    .reachable_from
4082                    .remove(&block_target);
4083            }
4084            ExtRef::Address(target) => {
4085                log::trace!("{target:?}'s address is not taken in {block_target:?} anymore");
4086                reachability_graph
4087                    .for_code
4088                    .get_mut(target)
4089                    .unwrap()
4090                    .address_taken_in
4091                    .remove(&block_target);
4092            }
4093            ExtRef::DataAddress(target) => {
4094                log::trace!("{target:?}'s address is not taken in {block_target:?} anymore");
4095                reachability_graph
4096                    .for_data
4097                    .get_mut(target)
4098                    .unwrap()
4099                    .address_taken_in
4100                    .remove(&block_target);
4101            }
4102        }
4103    }
4104
4105    for ext in &new_references {
4106        match ext {
4107            ExtRef::Jump(target) => {
4108                log::trace!("{target:?} is reachable from {block_target:?}");
4109                reachability_graph
4110                    .for_code
4111                    .get_mut(target)
4112                    .unwrap()
4113                    .reachable_from
4114                    .insert(block_target);
4115            }
4116            ExtRef::Address(target) => {
4117                log::trace!("{target:?}'s address is taken in {block_target:?}");
4118                reachability_graph
4119                    .for_code
4120                    .get_mut(target)
4121                    .unwrap()
4122                    .address_taken_in
4123                    .insert(block_target);
4124            }
4125            ExtRef::DataAddress(target) => {
4126                log::trace!("{target:?}'s address is taken in {block_target:?}");
4127                reachability_graph
4128                    .for_data
4129                    .get_mut(target)
4130                    .unwrap()
4131                    .address_taken_in
4132                    .insert(block_target);
4133            }
4134        }
4135    }
4136
4137    for ext in old_references.into_iter().chain(new_references.into_iter()) {
4138        match ext {
4139            ExtRef::Jump(target) => {
4140                remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
4141            }
4142            ExtRef::Address(target) => {
4143                remove_code_if_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
4144            }
4145            ExtRef::DataAddress(target) => {
4146                remove_if_data_is_globally_unreachable(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), target);
4147            }
4148        }
4149    }
4150}
4151
4152#[deny(clippy::as_conversions)]
4153fn perform_dead_code_elimination(
4154    config: &Config,
4155    imports: &[Import],
4156    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
4157    info_for_block: &mut [BlockInfo],
4158    reachability_graph: &mut ReachabilityGraph,
4159    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
4160    block_target: BlockTarget,
4161) -> bool {
4162    #[allow(clippy::too_many_arguments)]
4163    fn perform_dead_code_elimination_on_block(
4164        config: &Config,
4165        imports: &[Import],
4166        all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
4167        reachability_graph: &mut ReachabilityGraph,
4168        mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
4169        modified: &mut bool,
4170        mut registers_needed: RegMask,
4171        block_target: BlockTarget,
4172    ) -> RegMask {
4173        let next_instruction = &all_blocks[block_target.index()].next.instruction;
4174        registers_needed.remove(next_instruction.dst_mask());
4175        registers_needed.insert(next_instruction.src_mask());
4176
4177        let mut dead_code = Vec::new();
4178        for (nth_instruction, (_, op)) in all_blocks[block_target.index()].ops.iter_mut().enumerate().rev() {
4179            let dst_mask = op.dst_mask(imports);
4180            if let BasicInst::Epilogue { ref mut regs, .. } = op {
4181                regs.retain(|&(_, reg)| (RegMask::from(reg) & registers_needed) != RegMask::empty());
4182            }
4183
4184            if !op.has_side_effects(config) && (dst_mask & registers_needed) == RegMask::empty() {
4185                // This instruction has no side effects and its result is not used; it's dead.
4186                dead_code.push(nth_instruction);
4187                continue;
4188            }
4189
4190            // If the register was overwritten it means it wasn't needed later.
4191            registers_needed.remove(dst_mask);
4192            // ...unless it was used as a source.
4193            registers_needed.insert(op.src_mask(imports));
4194        }
4195
4196        if dead_code.is_empty() {
4197            return registers_needed;
4198        }
4199
4200        *modified = true;
4201        if let Some(ref mut optimize_queue) = optimize_queue {
4202            add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, block_target);
4203        }
4204
4205        let references = gather_references(&all_blocks[block_target.index()]);
4206        for nth_instruction in dead_code {
4207            log::trace!(
4208                "Removing dead instruction in {}: {:?}",
4209                all_blocks[block_target.index()].ops[nth_instruction].0,
4210                all_blocks[block_target.index()].ops[nth_instruction].1
4211            );
4212
4213            // Replace it with a NOP.
4214            all_blocks[block_target.index()].ops[nth_instruction].1 = BasicInst::Nop;
4215        }
4216
4217        all_blocks[block_target.index()]
4218            .ops
4219            .retain(|(_, instruction)| !instruction.is_nop());
4220
4221        update_references(all_blocks, reachability_graph, optimize_queue, block_target, references);
4222        registers_needed
4223    }
4224
4225    if !reachability_graph.is_code_reachable(block_target) {
4226        return false;
4227    }
4228
4229    let mut previous_blocks = Vec::new();
4230    for &previous_block in &reachability_graph.for_code.get(&block_target).unwrap().reachable_from {
4231        if previous_block == block_target {
4232            continue;
4233        }
4234
4235        let ControlInst::Jump { target } = all_blocks[previous_block.index()].next.instruction else {
4236            continue;
4237        };
4238        if target == block_target {
4239            previous_blocks.push(previous_block);
4240        }
4241    }
4242
4243    let registers_needed_for_next_block = match all_blocks[block_target.index()].next.instruction {
4244        // If it's going to trap then it's not going to need any of the register values.
4245        ControlInst::Unimplemented => RegMask::empty(),
4246        // If it's a jump then we'll need whatever registers the jump target needs.
4247        ControlInst::Jump { target } => info_for_block[target.index()].registers_needed,
4248        ControlInst::Branch {
4249            target_true, target_false, ..
4250        } => info_for_block[target_true.index()].registers_needed | info_for_block[target_false.index()].registers_needed,
4251        // ...otherwise assume it'll need all of them.
4252        ControlInst::Call { .. } => unreachable!(),
4253        ControlInst::CallIndirect { .. } | ControlInst::JumpIndirect { .. } => RegMask::all(),
4254    };
4255
4256    let mut modified = false;
4257    let registers_needed_for_this_block = perform_dead_code_elimination_on_block(
4258        config,
4259        imports,
4260        all_blocks,
4261        reachability_graph,
4262        optimize_queue.as_deref_mut(),
4263        &mut modified,
4264        registers_needed_for_next_block,
4265        block_target,
4266    );
4267
4268    if info_for_block[block_target.index()].registers_needed != registers_needed_for_this_block {
4269        info_for_block[block_target.index()].registers_needed = registers_needed_for_this_block;
4270        if let Some(ref mut optimize_queue) = optimize_queue {
4271            for previous_block in previous_blocks {
4272                add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, previous_block);
4273            }
4274        }
4275    }
4276
4277    modified
4278}
4279
4280#[derive(Copy, Clone, PartialEq, Eq, Debug)]
4281pub enum AnyAnyKind {
4282    Add32,
4283    Add32AndSignExtend,
4284    Add64,
4285    Sub32,
4286    Sub32AndSignExtend,
4287    Sub64,
4288    And32,
4289    And64,
4290    Or32,
4291    Or64,
4292    Xor32,
4293    Xor64,
4294    SetLessThanUnsigned32,
4295    SetLessThanUnsigned64,
4296    SetLessThanSigned32,
4297    SetLessThanSigned64,
4298    ShiftLogicalLeft32,
4299    ShiftLogicalLeft32AndSignExtend,
4300    ShiftLogicalLeft64,
4301    ShiftLogicalRight32,
4302    ShiftLogicalRight32AndSignExtend,
4303    ShiftLogicalRight64,
4304    ShiftArithmeticRight32,
4305    ShiftArithmeticRight32AndSignExtend,
4306    ShiftArithmeticRight64,
4307    Mul32,
4308    Mul32AndSignExtend,
4309    Mul64,
4310    RotateRight32,
4311    RotateRight32AndSignExtend,
4312    RotateRight64,
4313}
4314
4315impl AnyAnyKind {
4316    fn add_for_bitness(bitness: Bitness) -> Self {
4317        match bitness {
4318            Bitness::B32 => AnyAnyKind::Add32,
4319            Bitness::B64 => AnyAnyKind::Add64,
4320        }
4321    }
4322}
4323
4324#[derive(Copy, Clone, PartialEq, Eq, Debug)]
4325pub enum RegKind {
4326    CountLeadingZeroBits32,
4327    CountLeadingZeroBits64,
4328    CountSetBits32,
4329    CountSetBits64,
4330    CountTrailingZeroBits32,
4331    CountTrailingZeroBits64,
4332    ReverseByte,
4333    SignExtend8,
4334    SignExtend16,
4335    ZeroExtend16,
4336}
4337
4338#[derive(Copy, Clone, PartialEq, Eq, Debug)]
4339pub enum RegRegKind {
4340    MulUpperSignedSigned32,
4341    MulUpperSignedSigned64,
4342    MulUpperUnsignedUnsigned32,
4343    MulUpperUnsignedUnsigned64,
4344    MulUpperSignedUnsigned32,
4345    MulUpperSignedUnsigned64,
4346    Div32,
4347    Div32AndSignExtend,
4348    Div64,
4349    DivUnsigned32,
4350    DivUnsigned32AndSignExtend,
4351    DivUnsigned64,
4352    Rem32,
4353    Rem32AndSignExtend,
4354    Rem64,
4355    RemUnsigned32,
4356    RemUnsigned32AndSignExtend,
4357    RemUnsigned64,
4358
4359    AndInverted,
4360    OrInverted,
4361    Xnor,
4362    Maximum,
4363    MaximumUnsigned,
4364    Minimum,
4365    MinimumUnsigned,
4366    RotateLeft32,
4367    RotateLeft32AndSignExtend,
4368    RotateLeft64,
4369}
4370
4371#[derive(Copy, Clone, PartialEq, Eq, Debug)]
4372enum OperationKind {
4373    Add32,
4374    Add32AndSignExtend,
4375    Add64,
4376    Sub32,
4377    Sub32AndSignExtend,
4378    Sub64,
4379    And32,
4380    And64,
4381    Or32,
4382    Or64,
4383    Xor32,
4384    Xor64,
4385    SetLessThanUnsigned32,
4386    SetLessThanUnsigned64,
4387    SetLessThanSigned32,
4388    SetLessThanSigned64,
4389    ShiftLogicalLeft32,
4390    ShiftLogicalLeft32AndSignExtend,
4391    ShiftLogicalLeft64,
4392    ShiftLogicalRight32,
4393    ShiftLogicalRight32AndSignExtend,
4394    ShiftLogicalRight64,
4395    ShiftArithmeticRight32,
4396    ShiftArithmeticRight32AndSignExtend,
4397    ShiftArithmeticRight64,
4398
4399    Mul32,
4400    Mul32AndSignExtend,
4401    Mul64,
4402    MulUpperSignedSigned32,
4403    MulUpperSignedSigned64,
4404    MulUpperSignedUnsigned32,
4405    MulUpperSignedUnsigned64,
4406    MulUpperUnsignedUnsigned32,
4407    MulUpperUnsignedUnsigned64,
4408    Div32,
4409    Div32AndSignExtend,
4410    Div64,
4411    DivUnsigned32,
4412    DivUnsigned32AndSignExtend,
4413    DivUnsigned64,
4414    Rem32,
4415    Rem32AndSignExtend,
4416    Rem64,
4417    RemUnsigned32,
4418    RemUnsigned32AndSignExtend,
4419    RemUnsigned64,
4420
4421    Eq32,
4422    Eq64,
4423    NotEq32,
4424    NotEq64,
4425    SetGreaterOrEqualSigned32,
4426    SetGreaterOrEqualSigned64,
4427    SetGreaterOrEqualUnsigned32,
4428    SetGreaterOrEqualUnsigned64,
4429
4430    AndInverted,
4431    OrInverted,
4432    Xnor,
4433    Maximum,
4434    MaximumUnsigned,
4435    Minimum,
4436    MinimumUnsigned,
4437    RotateLeft32,
4438    RotateLeft32AndSignExtend,
4439    RotateLeft64,
4440    RotateRight32,
4441    RotateRight32AndSignExtend,
4442    RotateRight64,
4443}
4444
4445impl From<AnyAnyKind> for OperationKind {
4446    fn from(kind: AnyAnyKind) -> Self {
4447        match kind {
4448            AnyAnyKind::Add32 => Self::Add32,
4449            AnyAnyKind::Add32AndSignExtend => Self::Add32AndSignExtend,
4450            AnyAnyKind::Add64 => Self::Add64,
4451            AnyAnyKind::Sub32 => Self::Sub32,
4452            AnyAnyKind::Sub32AndSignExtend => Self::Sub32AndSignExtend,
4453            AnyAnyKind::Sub64 => Self::Sub64,
4454            AnyAnyKind::And32 => Self::And32,
4455            AnyAnyKind::And64 => Self::And64,
4456            AnyAnyKind::Or32 => Self::Or32,
4457            AnyAnyKind::Or64 => Self::Or64,
4458            AnyAnyKind::Xor32 => Self::Xor32,
4459            AnyAnyKind::Xor64 => Self::Xor64,
4460            AnyAnyKind::SetLessThanUnsigned32 => Self::SetLessThanUnsigned32,
4461            AnyAnyKind::SetLessThanUnsigned64 => Self::SetLessThanUnsigned64,
4462            AnyAnyKind::SetLessThanSigned32 => Self::SetLessThanSigned32,
4463            AnyAnyKind::SetLessThanSigned64 => Self::SetLessThanSigned64,
4464            AnyAnyKind::ShiftLogicalLeft32 => Self::ShiftLogicalLeft32,
4465            AnyAnyKind::ShiftLogicalLeft32AndSignExtend => Self::ShiftLogicalLeft32AndSignExtend,
4466            AnyAnyKind::ShiftLogicalLeft64 => Self::ShiftLogicalLeft64,
4467            AnyAnyKind::ShiftLogicalRight32 => Self::ShiftLogicalRight32,
4468            AnyAnyKind::ShiftLogicalRight32AndSignExtend => Self::ShiftLogicalRight32AndSignExtend,
4469            AnyAnyKind::ShiftLogicalRight64 => Self::ShiftLogicalRight64,
4470            AnyAnyKind::ShiftArithmeticRight32 => Self::ShiftArithmeticRight32,
4471            AnyAnyKind::ShiftArithmeticRight32AndSignExtend => Self::ShiftArithmeticRight32AndSignExtend,
4472            AnyAnyKind::ShiftArithmeticRight64 => Self::ShiftArithmeticRight64,
4473            AnyAnyKind::Mul32 => Self::Mul32,
4474            AnyAnyKind::Mul32AndSignExtend => Self::Mul32AndSignExtend,
4475            AnyAnyKind::Mul64 => Self::Mul64,
4476            AnyAnyKind::RotateRight32 => Self::RotateRight32,
4477            AnyAnyKind::RotateRight32AndSignExtend => Self::RotateRight32AndSignExtend,
4478            AnyAnyKind::RotateRight64 => Self::RotateRight64,
4479        }
4480    }
4481}
4482
4483impl From<RegRegKind> for OperationKind {
4484    fn from(kind: RegRegKind) -> Self {
4485        match kind {
4486            RegRegKind::MulUpperSignedSigned32 => Self::MulUpperSignedSigned32,
4487            RegRegKind::MulUpperSignedSigned64 => Self::MulUpperSignedSigned64,
4488            RegRegKind::MulUpperUnsignedUnsigned32 => Self::MulUpperUnsignedUnsigned32,
4489            RegRegKind::MulUpperUnsignedUnsigned64 => Self::MulUpperUnsignedUnsigned64,
4490            RegRegKind::MulUpperSignedUnsigned32 => Self::MulUpperSignedUnsigned32,
4491            RegRegKind::MulUpperSignedUnsigned64 => Self::MulUpperSignedUnsigned64,
4492            RegRegKind::Div32 => Self::Div32,
4493            RegRegKind::Div32AndSignExtend => Self::Div32AndSignExtend,
4494            RegRegKind::Div64 => Self::Div64,
4495            RegRegKind::DivUnsigned32 => Self::DivUnsigned32,
4496            RegRegKind::DivUnsigned32AndSignExtend => Self::DivUnsigned32AndSignExtend,
4497            RegRegKind::DivUnsigned64 => Self::DivUnsigned64,
4498            RegRegKind::Rem32 => Self::Rem32,
4499            RegRegKind::Rem32AndSignExtend => Self::Rem32AndSignExtend,
4500            RegRegKind::Rem64 => Self::Rem64,
4501            RegRegKind::RemUnsigned32 => Self::RemUnsigned32,
4502            RegRegKind::RemUnsigned32AndSignExtend => Self::RemUnsigned32AndSignExtend,
4503            RegRegKind::RemUnsigned64 => Self::RemUnsigned64,
4504            RegRegKind::AndInverted => Self::AndInverted,
4505            RegRegKind::OrInverted => Self::OrInverted,
4506            RegRegKind::Xnor => Self::Xnor,
4507            RegRegKind::Maximum => Self::Maximum,
4508            RegRegKind::MaximumUnsigned => Self::MaximumUnsigned,
4509            RegRegKind::Minimum => Self::Minimum,
4510            RegRegKind::MinimumUnsigned => Self::MinimumUnsigned,
4511            RegRegKind::RotateLeft32 => Self::RotateLeft32,
4512            RegRegKind::RotateLeft32AndSignExtend => Self::RotateLeft32AndSignExtend,
4513            RegRegKind::RotateLeft64 => Self::RotateLeft64,
4514        }
4515    }
4516}
4517
4518impl From<BranchKind> for OperationKind {
4519    fn from(kind: BranchKind) -> Self {
4520        match kind {
4521            BranchKind::Eq32 => Self::Eq32,
4522            BranchKind::Eq64 => Self::Eq64,
4523            BranchKind::NotEq32 => Self::NotEq32,
4524            BranchKind::NotEq64 => Self::NotEq64,
4525            BranchKind::LessSigned32 => Self::SetLessThanSigned32,
4526            BranchKind::LessSigned64 => Self::SetLessThanSigned64,
4527            BranchKind::GreaterOrEqualSigned32 => Self::SetGreaterOrEqualSigned32,
4528            BranchKind::GreaterOrEqualSigned64 => Self::SetGreaterOrEqualSigned64,
4529            BranchKind::LessUnsigned32 => Self::SetLessThanUnsigned32,
4530            BranchKind::LessUnsigned64 => Self::SetLessThanUnsigned64,
4531            BranchKind::GreaterOrEqualUnsigned32 => Self::SetGreaterOrEqualUnsigned32,
4532            BranchKind::GreaterOrEqualUnsigned64 => Self::SetGreaterOrEqualUnsigned64,
4533        }
4534    }
4535}
4536
4537impl OperationKind {
4538    #[rustfmt::skip]
4539    fn apply_const(self, lhs: i64, rhs: i64) -> i64 {
4540        use polkavm_common::operation::*;
4541        macro_rules! op32 {
4542            (|$lhs:ident, $rhs:ident| $e:expr) => {{
4543                let $lhs: i32 = lhs.try_into().expect("operand overflow");
4544                let $rhs: i32 = rhs.try_into().expect("operand overflow");
4545                let out: i32 = $e;
4546                cast(out).to_i64_sign_extend()
4547            }};
4548        }
4549
4550        macro_rules! op32_on_64 {
4551            (|$lhs:ident, $rhs:ident| $e:expr) => {{
4552                let $lhs: u64 = cast($lhs).to_unsigned();
4553                let $lhs: u32 = cast($lhs).truncate_to_u32();
4554                let $lhs: i32 = cast($lhs).to_signed();
4555                let $rhs: u64 = cast($rhs).to_unsigned();
4556                let $rhs: u32 = cast($rhs).truncate_to_u32();
4557                let $rhs: i32 = cast($rhs).to_signed();
4558                let out: i32 = $e;
4559                cast(out).to_i64_sign_extend()
4560            }};
4561        }
4562
4563        match self {
4564            Self::Add32 => {
4565                op32!(|lhs, rhs| lhs.wrapping_add(rhs))
4566            }
4567            Self::Add32AndSignExtend => {
4568                op32_on_64!(|lhs, rhs| lhs.wrapping_add(rhs))
4569            }
4570            Self::Add64 => {
4571                lhs.wrapping_add(rhs)
4572            },
4573            Self::And32 => {
4574                op32!(|lhs, rhs| lhs & rhs)
4575            }
4576            Self::And64 => {
4577                lhs & rhs
4578            },
4579            Self::Div32 => {
4580                op32!(|lhs, rhs| div(lhs, rhs))
4581            }
4582            Self::Div32AndSignExtend => {
4583                op32_on_64!(|lhs, rhs| div(lhs, rhs))
4584            }
4585            Self::Div64 => {
4586                div64(lhs, rhs)
4587            },
4588            Self::DivUnsigned32 => {
4589                op32!(|lhs, rhs| cast(divu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4590            }
4591            Self::DivUnsigned32AndSignExtend => {
4592                op32_on_64!(|lhs, rhs| cast(divu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4593            }
4594            Self::DivUnsigned64 => {
4595                cast(divu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed()
4596            },
4597            Self::Eq32 => {
4598                op32!(|lhs, rhs| i32::from(lhs == rhs))
4599            }
4600            Self::Eq64 => {
4601                i64::from(lhs == rhs)
4602            },
4603            Self::Mul32 => {
4604                op32!(|lhs, rhs| lhs.wrapping_mul(rhs))
4605            }
4606            Self::Mul32AndSignExtend => {
4607                op32_on_64!(|lhs, rhs| lhs.wrapping_mul(rhs))
4608            }
4609            Self::Mul64 => {
4610                lhs.wrapping_mul(rhs)
4611            },
4612            Self::MulUpperSignedSigned32 => {
4613                op32!(|lhs, rhs| mulh(lhs, rhs))
4614            },
4615            Self::MulUpperSignedSigned64 => {
4616                mulh64(lhs, rhs)
4617            },
4618            Self::MulUpperSignedUnsigned32 => {
4619                op32!(|lhs, rhs| mulhsu(lhs, cast(rhs).to_unsigned()))
4620            },
4621            Self::MulUpperSignedUnsigned64 => {
4622                mulhsu64(lhs, cast(rhs).to_unsigned())
4623            },
4624            Self::MulUpperUnsignedUnsigned32 => {
4625                op32!(|lhs, rhs| cast(mulhu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4626            },
4627            Self::MulUpperUnsignedUnsigned64 => {
4628                cast(mulhu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed()
4629            },
4630            Self::NotEq32 => {
4631                op32!(|lhs, rhs| i32::from(lhs != rhs))
4632            },
4633            Self::NotEq64 => {
4634                i64::from(lhs != rhs)
4635            },
4636            Self::Or32 => {
4637                op32!(|lhs, rhs| lhs | rhs)
4638            },
4639            Self::Or64 => {
4640                lhs | rhs
4641            },
4642            Self::Rem32 => {
4643                op32!(|lhs, rhs| rem(lhs, rhs))
4644            },
4645            Self::Rem32AndSignExtend => {
4646                op32_on_64!(|lhs, rhs| rem(lhs, rhs))
4647            },
4648            Self::Rem64 => {
4649                rem64(lhs, rhs)
4650            },
4651            Self::RemUnsigned32 => {
4652                op32!(|lhs, rhs| cast(remu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4653            },
4654            Self::RemUnsigned32AndSignExtend => {
4655                op32_on_64!(|lhs, rhs| cast(remu(cast(lhs).to_unsigned(), cast(rhs).to_unsigned())).to_signed())
4656            }
4657            Self::RemUnsigned64 => {
4658                remu64(cast(lhs).to_unsigned(), cast(rhs).to_unsigned()) as i64
4659            },
4660            Self::SetGreaterOrEqualSigned32 => {
4661                op32!(|lhs, rhs| i32::from(lhs >= rhs))
4662            },
4663            Self::SetGreaterOrEqualSigned64 => {
4664                i64::from(lhs >= rhs)
4665            },
4666            Self::SetGreaterOrEqualUnsigned32 => {
4667                op32!(|lhs, rhs| i32::from(cast(lhs).to_unsigned() >= cast(rhs).to_unsigned()))
4668            },
4669            Self::SetGreaterOrEqualUnsigned64 => {
4670                i64::from(cast(lhs).to_unsigned() >= cast(rhs).to_unsigned())
4671            },
4672            Self::SetLessThanSigned32 => {
4673                op32!(|lhs, rhs| i32::from(lhs < rhs))
4674            },
4675            Self::SetLessThanSigned64 => {
4676                i64::from(lhs < rhs)
4677            },
4678            Self::SetLessThanUnsigned32 => {
4679                op32!(|lhs, rhs| i32::from(cast(lhs).to_unsigned() < cast(rhs).to_unsigned()))
4680            },
4681            Self::SetLessThanUnsigned64 => {
4682                i64::from((lhs as u64) < (rhs as u64))
4683            },
4684            Self::ShiftArithmeticRight32 => {
4685                op32!(|lhs, rhs| lhs.wrapping_shr(cast(rhs).to_unsigned()))
4686            },
4687            Self::ShiftArithmeticRight32AndSignExtend => {
4688                op32_on_64!(|lhs, rhs| lhs.wrapping_shr(cast(rhs).to_unsigned()))
4689            },
4690            Self::ShiftArithmeticRight64 => {
4691                let rhs = cast(rhs).to_unsigned();
4692                let rhs = cast(rhs).truncate_to_u32();
4693                lhs.wrapping_shr(rhs)
4694            },
4695            Self::ShiftLogicalLeft32 => {
4696                op32!(|lhs, rhs| lhs.wrapping_shl(cast(rhs).to_unsigned()))
4697            },
4698            Self::ShiftLogicalLeft32AndSignExtend => {
4699                op32_on_64!(|lhs, rhs| lhs.wrapping_shl(cast(rhs).to_unsigned()))
4700            },
4701            Self::ShiftLogicalLeft64 => {
4702                let rhs = cast(rhs).to_unsigned();
4703                let rhs = cast(rhs).truncate_to_u32();
4704                (lhs as u64).wrapping_shl(rhs) as i64
4705            },
4706            Self::ShiftLogicalRight32 => {
4707                op32!(|lhs, rhs| cast(cast(lhs).to_unsigned().wrapping_shr(cast(rhs).to_unsigned())).to_signed())
4708            },
4709            Self::ShiftLogicalRight32AndSignExtend => {
4710                op32_on_64!(|lhs, rhs| cast(cast(lhs).to_unsigned().wrapping_shr(cast(rhs).to_unsigned())).to_signed())
4711            }
4712            Self::ShiftLogicalRight64 => {
4713                (lhs as u64).wrapping_shr(rhs as u32) as i64
4714            },
4715            Self::Sub32 => {
4716                op32!(|lhs, rhs| lhs.wrapping_sub(rhs))
4717            },
4718            Self::Sub32AndSignExtend => {
4719                op32_on_64!(|lhs, rhs| lhs.wrapping_sub(rhs))
4720            },
4721            Self::Sub64 => {
4722                lhs.wrapping_sub(rhs)
4723            },
4724            Self::Xor32 => {
4725                op32!(|lhs, rhs| lhs ^ rhs)
4726            },
4727            Self::Xor64 => {
4728                lhs ^ rhs
4729            },
4730            //
4731            // Zbb instructions
4732            //
4733            Self::AndInverted => lhs & (!rhs),
4734            Self::OrInverted => lhs | (!rhs),
4735            Self::Xnor => !(lhs ^ rhs),
4736            Self::Maximum => lhs.max(rhs),
4737            Self::MaximumUnsigned => (lhs as u64).max(rhs as u64) as i64,
4738            Self::Minimum => lhs.min(rhs),
4739            Self::MinimumUnsigned => (lhs as u64).min(rhs as u64) as i64,
4740            Self::RotateLeft32 => {
4741                op32!(|lhs, rhs| lhs.rotate_left(rhs as u32))
4742            },
4743            Self::RotateLeft32AndSignExtend => {
4744                op32_on_64!(|lhs, rhs| lhs.rotate_left(rhs as u32))
4745            },
4746            Self::RotateLeft64 => {
4747                let rhs = cast(rhs).to_unsigned();
4748                let rhs = cast(rhs).truncate_to_u32();
4749                lhs.rotate_left(rhs)
4750            },
4751            Self::RotateRight32 => {
4752                op32!(|lhs, rhs| lhs.rotate_right(rhs as u32))
4753            },
4754            Self::RotateRight32AndSignExtend => {
4755                op32_on_64!(|lhs, rhs| lhs.rotate_right(rhs as u32))
4756            },
4757            Self::RotateRight64 => {
4758                let rhs = cast(rhs).to_unsigned();
4759                let rhs = cast(rhs).truncate_to_u32();
4760                lhs.rotate_right(rhs)
4761            },
4762        }
4763    }
4764
4765    fn apply(self, elf: &Elf, lhs: RegValue, rhs: RegValue) -> Option<RegValue> {
4766        use OperationKind as O;
4767        use RegValue::Constant as C;
4768        let native_add = if elf.is_64() { O::Add32 } else { O::Add64 };
4769
4770        #[rustfmt::skip]
4771        let value = match (self, lhs, rhs) {
4772            (_, C(lhs), C(rhs)) => {
4773                C(self.apply_const(lhs, rhs))
4774            },
4775            (O::Add32, RegValue::DataAddress(lhs), C(rhs)) => {
4776                let offset = cast(cast(lhs.offset).to_signed().wrapping_add(rhs)).to_unsigned();
4777                if offset <= elf.section_by_index(lhs.section_index).size() {
4778                    RegValue::DataAddress(SectionTarget {
4779                        section_index: lhs.section_index,
4780                        offset,
4781                    })
4782                } else {
4783                    return None;
4784                }
4785            },
4786            (O::Sub32, RegValue::DataAddress(lhs), C(rhs)) => {
4787                let offset = cast(lhs.offset).to_signed().wrapping_sub(rhs);
4788                if offset >= 0 {
4789                    RegValue::DataAddress(SectionTarget {
4790                        section_index: lhs.section_index,
4791                        offset: cast(offset).to_unsigned(),
4792                    })
4793                } else {
4794                    return None;
4795                }
4796            },
4797
4798            // (x == x) = 1
4799            (O::Eq32,                   lhs, rhs) if lhs == rhs => C(1),
4800            (O::Eq64,                   lhs, rhs) if lhs == rhs => C(1),
4801            // (x != x) = 0
4802            (O::NotEq32,                lhs, rhs) if lhs == rhs => C(0),
4803            (O::NotEq64,                lhs, rhs) if lhs == rhs => C(0),
4804            // x & x = x
4805            (O::And32,                  lhs, rhs) if lhs == rhs => lhs,
4806            (O::And64,                  lhs, rhs) if lhs == rhs => lhs,
4807            // x | x = x
4808            (O::Or32,                   lhs, rhs) if lhs == rhs => lhs,
4809            (O::Or64,                   lhs, rhs) if lhs == rhs => lhs,
4810
4811            // x + 0 = x
4812            (O::Add32,                  lhs, C(0)) => lhs,
4813            (O::Add64,                  lhs, C(0)) => lhs,
4814            // 0 + x = x
4815            (O::Add32,                  C(0), rhs) => rhs,
4816            (O::Add64,                  C(0), rhs) => rhs,
4817            // x | 0 = x
4818            (O::Or32,                   lhs, C(0)) => lhs,
4819            (O::Or64,                   lhs, C(0)) => lhs,
4820            // 0 | x = x
4821            (O::Or32,                   C(0), rhs) => rhs,
4822            (O::Or64,                   C(0), rhs) => rhs,
4823            // x ^ 0 = x
4824            (O::Xor32,                  lhs, C(0)) => lhs,
4825            (O::Xor64,                  lhs, C(0)) => lhs,
4826            // 0 ^ x = x
4827            (O::Xor32,                  C(0), rhs) => rhs,
4828            (O::Xor64,                  C(0), rhs) => rhs,
4829
4830            // x - 0 = x
4831            (O::Sub32,                  lhs, C(0)) => lhs,
4832            (O::Sub64,                  lhs, C(0)) => lhs,
4833            // x << 0 = x
4834            (O::ShiftLogicalLeft32,     lhs, C(0)) => lhs,
4835            (O::ShiftLogicalLeft64,     lhs, C(0)) => lhs,
4836            // x >> 0 = x
4837            (O::ShiftLogicalRight32,    lhs, C(0)) => lhs,
4838            (O::ShiftLogicalRight64,    lhs, C(0)) => lhs,
4839            // x >> 0 = x
4840            (O::ShiftArithmeticRight32, lhs, C(0)) => lhs,
4841            (O::ShiftArithmeticRight64, lhs, C(0)) => lhs,
4842            // x % 0 = x
4843            (O::Rem32,                          lhs, C(0)) => lhs,
4844            (O::Rem64,                          lhs, C(0)) => lhs,
4845            (O::RemUnsigned32,                  lhs, C(0)) => lhs,
4846            (O::RemUnsigned64,                  lhs, C(0)) => lhs,
4847            (O::Rem32AndSignExtend,             lhs, C(0)) => lhs,
4848            (O::RemUnsigned32AndSignExtend,     lhs, C(0)) => lhs,
4849            // 0 % x = 0
4850            (O::Rem32,                          C(0), _) => C(0),
4851            (O::Rem64,                          C(0), _) => C(0),
4852            (O::RemUnsigned32,                  C(0), _) => C(0),
4853            (O::RemUnsigned64,                  C(0), _) => C(0),
4854            (O::Rem32AndSignExtend,             C(0), _) => C(0),
4855            (O::RemUnsigned32AndSignExtend,     C(0), _) => C(0),
4856
4857            // x & 0 = 0
4858            (O::And32,                    _, C(0)) => C(0),
4859            (O::And64,                    _, C(0)) => C(0),
4860            // 0 & x = 0
4861            (O::And32,                    C(0), _) => C(0),
4862            (O::And64,                    C(0), _) => C(0),
4863            // x * 0 = 0
4864            (O::Mul32,                    _, C(0)) => C(0),
4865            (O::Mul64,                    _, C(0)) => C(0),
4866            (O::MulUpperSignedSigned32,   _, C(0)) => C(0),
4867            (O::MulUpperSignedSigned64,   _, C(0)) => C(0),
4868            (O::MulUpperSignedUnsigned32, _, C(0)) => C(0),
4869            (O::MulUpperSignedUnsigned64, _, C(0)) => C(0),
4870            (O::MulUpperUnsignedUnsigned32, _, C(0)) => C(0),
4871            (O::MulUpperUnsignedUnsigned64, _, C(0)) => C(0),
4872            // 0 * x = 0
4873            (O::Mul32,                    C(0), _) => C(0),
4874            (O::Mul64,                    C(0), _) => C(0),
4875            (O::MulUpperSignedSigned32,   C(0), _) => C(0),
4876            (O::MulUpperSignedSigned64,   C(0), _) => C(0),
4877            (O::MulUpperSignedUnsigned32, C(0), _) => C(0),
4878            (O::MulUpperSignedUnsigned64, C(0), _) => C(0),
4879            (O::MulUpperUnsignedUnsigned32, C(0), _) => C(0),
4880            (O::MulUpperUnsignedUnsigned64, C(0), _) => C(0),
4881
4882            // x / 0 = -1
4883            (O::Div32,                          _, C(0)) => C(-1),
4884            (O::Div64,                          _, C(0)) => C(-1),
4885            (O::DivUnsigned32,                  _, C(0)) => C(-1),
4886            (O::DivUnsigned64,                  _, C(0)) => C(-1),
4887            (O::Div32AndSignExtend,             _, C(0)) => C(-1),
4888            (O::DivUnsigned32AndSignExtend,     _, C(0)) => C(-1),
4889
4890            // 0 / x = 0
4891            (O::Div32,                          C(0), _) => C(0),
4892            (O::Div64,                          C(0), _) => C(0),
4893            (O::DivUnsigned32,                  C(0), _) => C(0),
4894            (O::DivUnsigned64,                  C(0), _) => C(0),
4895            (O::Div32AndSignExtend,             C(0), _) => C(0),
4896            (O::DivUnsigned32AndSignExtend,     C(0), _) => C(0),
4897
4898            // (x & ~0) = x
4899            (O::AndInverted,              lhs, C(0)) => lhs,
4900            // (0 & ~x) = 0
4901            (O::AndInverted,              C(0), _) => C(0),
4902
4903            // (x | ~0) = -1
4904            (O::OrInverted,               _, C(0)) => C(-1),
4905
4906            // unsigned_max(0, x) = x
4907            (O::MaximumUnsigned,          C(0), rhs) => rhs,
4908            (O::MaximumUnsigned,          lhs, C(0)) => lhs,
4909
4910            // unsigned min(0, x) = 0
4911            (O::MinimumUnsigned,          C(0), _) => C(0),
4912            (O::MinimumUnsigned,          _, C(0)) => C(0),
4913
4914            // x <<r 0 = x
4915            (O::RotateLeft32,             lhs, C(0)) => lhs,
4916            (O::RotateLeft32,             C(0), _) => C(0),
4917            (O::RotateLeft64,             lhs, C(0)) => lhs,
4918            (O::RotateLeft64,             C(0), _) => C(0),
4919
4920            // x >>r 0 = x
4921            (O::RotateRight32,            lhs, C(0)) => lhs,
4922            (O::RotateRight32,            C(0), _) => C(0),
4923            (O::RotateRight64,            lhs, C(0)) => lhs,
4924            (O::RotateRight64,            C(0), _) => C(0),
4925
4926            // (0 <<r 0) or (0 >>r 0) = 0
4927            (O::RotateLeft32AndSignExtend,  C(0), _) => C(0),
4928            (O::RotateRight32AndSignExtend, C(0), _) => C(0),
4929
4930            (kind, RegValue::Reg { reg, direction, source_block, bits_used, addend }, C(imm)) |
4931            (kind, C(imm), RegValue::Reg { reg, direction, source_block, bits_used, addend })
4932                if kind == native_add => {
4933                    RegValue::Reg { reg, direction, source_block, bits_used, addend: imm.wrapping_add(addend) }
4934                },
4935
4936            _ => return None,
4937        };
4938
4939        Some(value)
4940    }
4941}
4942
4943#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
4944enum Direction {
4945    Input,
4946    Output,
4947}
4948
4949#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
4950enum RegValue {
4951    Reg {
4952        reg: Reg,
4953        direction: Direction,
4954        source_block: BlockTarget,
4955        bits_used: u64,
4956        addend: i64,
4957    },
4958    CodeAddress(BlockTarget),
4959    DataAddress(SectionTarget),
4960    Constant(i64),
4961    Unknown {
4962        unique: u64,
4963        bits_used: u64,
4964    },
4965}
4966
4967impl RegValue {
4968    fn to_instruction(self, dst: Reg, is_rv64: bool) -> Option<BasicInst<AnyTarget>> {
4969        match self {
4970            RegValue::CodeAddress(target) => Some(BasicInst::LoadAddress {
4971                dst,
4972                target: AnyTarget::Code(target),
4973            }),
4974            RegValue::DataAddress(target) => Some(BasicInst::LoadAddress {
4975                dst,
4976                target: AnyTarget::Data(target),
4977            }),
4978            RegValue::Constant(imm) => {
4979                if let Ok(imm) = i32::try_from(imm) {
4980                    Some(BasicInst::LoadImmediate { dst, imm })
4981                } else {
4982                    assert!(is_rv64, "64-bit register value on 32-bit target");
4983                    Some(BasicInst::LoadImmediate64 { dst, imm })
4984                }
4985            }
4986            _ => None,
4987        }
4988    }
4989
4990    fn bits_used(self) -> u64 {
4991        match self {
4992            RegValue::CodeAddress(..) | RegValue::DataAddress(..) => u64::from(u32::MAX),
4993            RegValue::Constant(value) => value as u64,
4994            RegValue::Unknown { bits_used, .. } => bits_used,
4995            RegValue::Reg { bits_used, addend, .. } => {
4996                if addend == 0 {
4997                    bits_used
4998                } else {
4999                    let addend = cast(addend).to_unsigned();
5000                    bits_used | (bits_used << 1) | addend | (addend << 1)
5001                }
5002            }
5003        }
5004    }
5005}
5006
5007struct BlockInfo {
5008    input_regs: BlockRegs,
5009    output_regs: BlockRegs,
5010    registers_needed: RegMask,
5011    stack: HashMap<RegValue, RegValue>,
5012    terminator: Terminator,
5013}
5014
5015#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
5016enum TerminatorKind {
5017    Unimplemented,
5018    InfiniteLoop,
5019    JumpIndirect { block: BlockTarget, base: Reg, offset: i64 },
5020}
5021
5022#[derive(Clone, Debug)]
5023enum Terminator {
5024    One(TerminatorKind),
5025    Many(BTreeSet<TerminatorKind>),
5026}
5027
5028impl Terminator {
5029    fn contains_infinite_loop(&self) -> bool {
5030        match self {
5031            Terminator::One(TerminatorKind::InfiniteLoop) => true,
5032            Terminator::Many(set) => set.contains(&TerminatorKind::InfiniteLoop),
5033            Terminator::One(..) => false,
5034        }
5035    }
5036
5037    fn merge(lhs: Self, rhs: Self) -> Self {
5038        match (lhs, rhs) {
5039            (Terminator::One(lhs), Terminator::One(rhs)) => {
5040                if lhs == rhs {
5041                    Terminator::One(lhs)
5042                } else {
5043                    let mut set = BTreeSet::new();
5044                    set.insert(lhs);
5045                    set.insert(rhs);
5046                    Terminator::Many(set)
5047                }
5048            }
5049            (Terminator::One(one), Terminator::Many(mut many)) | (Terminator::Many(mut many), Terminator::One(one)) => {
5050                many.insert(one);
5051                Terminator::Many(many)
5052            }
5053            (Terminator::Many(mut lhs), Terminator::Many(rhs)) => {
5054                lhs.extend(rhs);
5055                Terminator::Many(lhs)
5056            }
5057        }
5058    }
5059}
5060
5061#[derive(Clone, PartialEq, Eq)]
5062struct BlockRegs {
5063    bitness: Bitness,
5064    regs: [RegValue; Reg::ALL.len()],
5065}
5066
5067#[deny(clippy::as_conversions)]
5068impl BlockRegs {
5069    fn new_input(bitness: Bitness, source_block: BlockTarget) -> Self {
5070        BlockRegs {
5071            bitness,
5072            regs: Reg::ALL.map(|reg| RegValue::Reg {
5073                reg,
5074                direction: Direction::Input,
5075                source_block,
5076                bits_used: bitness.bits_used_mask(),
5077                addend: 0,
5078            }),
5079        }
5080    }
5081
5082    fn new_output(bitness: Bitness, source_block: BlockTarget) -> Self {
5083        BlockRegs {
5084            bitness,
5085            regs: Reg::ALL.map(|reg| RegValue::Reg {
5086                reg,
5087                direction: Direction::Output,
5088                source_block,
5089                bits_used: bitness.bits_used_mask(),
5090                addend: 0,
5091            }),
5092        }
5093    }
5094
5095    fn get_reg(&self, reg: impl Into<RegImm>) -> RegValue {
5096        match reg.into() {
5097            RegImm::Imm(imm) => RegValue::Constant(cast(imm).to_i64_sign_extend()),
5098            RegImm::Reg(reg) => self.regs[reg.to_usize()],
5099        }
5100    }
5101
5102    fn set_reg(&mut self, reg: Reg, value: RegValue) {
5103        self.regs[reg.to_usize()] = value;
5104    }
5105
5106    fn simplify_control_instruction(
5107        &self,
5108        elf: &Elf,
5109        instruction: ControlInst<BlockTarget>,
5110    ) -> Option<(Option<BasicInst<AnyTarget>>, ControlInst<BlockTarget>)> {
5111        match instruction {
5112            ControlInst::JumpIndirect { base, offset: 0 } => {
5113                if let RegValue::CodeAddress(target) = self.get_reg(base) {
5114                    return Some((None, ControlInst::Jump { target }));
5115                }
5116            }
5117            ControlInst::Branch {
5118                kind,
5119                src1,
5120                src2,
5121                target_true,
5122                target_false,
5123            } => {
5124                if target_true == target_false {
5125                    return Some((None, ControlInst::Jump { target: target_true }));
5126                }
5127
5128                let src1_value = self.get_reg(src1);
5129                let src2_value = self.get_reg(src2);
5130                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
5131                    match value {
5132                        RegValue::Constant(0) => {
5133                            return Some((None, ControlInst::Jump { target: target_false }));
5134                        }
5135                        RegValue::Constant(1) => {
5136                            return Some((None, ControlInst::Jump { target: target_true }));
5137                        }
5138                        _ => unreachable!("internal error: constant evaluation of branch operands returned a non-boolean value"),
5139                    }
5140                }
5141
5142                if let RegImm::Reg(_) = src1 {
5143                    if let RegValue::Constant(src1_value) = src1_value {
5144                        if let Ok(src1_value) = src1_value.try_into() {
5145                            return Some((
5146                                None,
5147                                ControlInst::Branch {
5148                                    kind,
5149                                    src1: RegImm::Imm(src1_value),
5150                                    src2,
5151                                    target_true,
5152                                    target_false,
5153                                },
5154                            ));
5155                        }
5156                    }
5157                }
5158
5159                if let RegImm::Reg(_) = src2 {
5160                    if let RegValue::Constant(src2_value) = src2_value {
5161                        if let Ok(src2_value) = src2_value.try_into() {
5162                            return Some((
5163                                None,
5164                                ControlInst::Branch {
5165                                    kind,
5166                                    src1,
5167                                    src2: RegImm::Imm(src2_value),
5168                                    target_true,
5169                                    target_false,
5170                                },
5171                            ));
5172                        }
5173                    }
5174                }
5175            }
5176            ControlInst::CallIndirect {
5177                ra,
5178                base,
5179                offset: 0,
5180                target_return,
5181            } => {
5182                if let RegValue::CodeAddress(target) = self.get_reg(base) {
5183                    let instruction_1 = BasicInst::LoadAddress {
5184                        dst: ra,
5185                        target: AnyTarget::Code(target_return),
5186                    };
5187                    let instruction_2 = ControlInst::Jump { target };
5188                    return Some((Some(instruction_1), instruction_2));
5189                }
5190            }
5191            _ => {}
5192        }
5193
5194        None
5195    }
5196
5197    fn simplify_instruction(&self, elf: &Elf, instruction: BasicInst<AnyTarget>) -> Option<BasicInst<AnyTarget>> {
5198        let is_rv64 = self.bitness == Bitness::B64;
5199
5200        match instruction {
5201            BasicInst::RegReg { kind, dst, src1, src2 } => {
5202                let src1_value = self.get_reg(src1);
5203                let src2_value = self.get_reg(src2);
5204                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
5205                    if let Some(new_instruction) = value.to_instruction(dst, is_rv64) {
5206                        if new_instruction != instruction {
5207                            return Some(new_instruction);
5208                        }
5209                    }
5210                }
5211            }
5212            BasicInst::AnyAny { kind, dst, src1, src2 } => {
5213                let src1_value = self.get_reg(src1);
5214                let src2_value = self.get_reg(src2);
5215                if let Some(value) = OperationKind::from(kind).apply(elf, src1_value, src2_value) {
5216                    if let Some(new_instruction) = value.to_instruction(dst, is_rv64) {
5217                        if new_instruction != instruction {
5218                            return Some(new_instruction);
5219                        }
5220                    }
5221                }
5222
5223                if let RegImm::Reg(_) = src1 {
5224                    if let RegValue::Constant(src1_value) = src1_value {
5225                        if let Ok(src1_value) = src1_value.try_into() {
5226                            return Some(BasicInst::AnyAny {
5227                                kind,
5228                                dst,
5229                                src1: RegImm::Imm(src1_value),
5230                                src2,
5231                            });
5232                        }
5233                    }
5234                }
5235
5236                if let RegImm::Reg(_) = src2 {
5237                    if let RegValue::Constant(src2_value) = src2_value {
5238                        if let Ok(src2_value) = src2_value.try_into() {
5239                            return Some(BasicInst::AnyAny {
5240                                kind,
5241                                dst,
5242                                src1,
5243                                src2: RegImm::Imm(src2_value),
5244                            });
5245                        }
5246                    }
5247                }
5248
5249                if matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64) {
5250                    if src1_value == RegValue::Constant(0) {
5251                        if let RegImm::Reg(src) = src2 {
5252                            return Some(BasicInst::MoveReg { dst, src });
5253                        }
5254                    } else if src2_value == RegValue::Constant(0) {
5255                        if let RegImm::Reg(src) = src1 {
5256                            return Some(BasicInst::MoveReg { dst, src });
5257                        }
5258                    }
5259                }
5260
5261                if matches!(kind, AnyAnyKind::Add32 | AnyAnyKind::Add64)
5262                    && src1_value != RegValue::Constant(0)
5263                    && src2_value != RegValue::Constant(0)
5264                    && (src1_value.bits_used() & src2_value.bits_used()) == 0
5265                {
5266                    // Replace an `add` with an `or` if it's safe to do so.
5267                    //
5268                    // Curiously LLVM's RISC-V backend doesn't do this even though its AMD64 backend does.
5269                    return Some(BasicInst::AnyAny {
5270                        kind: match kind {
5271                            AnyAnyKind::Add32 => AnyAnyKind::Or32,
5272                            AnyAnyKind::Add64 => AnyAnyKind::Or64,
5273                            _ => unreachable!(),
5274                        },
5275                        dst,
5276                        src1,
5277                        src2,
5278                    });
5279                }
5280            }
5281            BasicInst::Cmov {
5282                kind,
5283                dst,
5284                src: RegImm::Reg(src),
5285                cond,
5286            } => {
5287                if let RegValue::Constant(src_value) = self.get_reg(src) {
5288                    if let Ok(src_value) = src_value.try_into() {
5289                        return Some(BasicInst::Cmov {
5290                            kind,
5291                            dst,
5292                            src: RegImm::Imm(src_value),
5293                            cond,
5294                        });
5295                    }
5296                }
5297            }
5298            BasicInst::LoadIndirect { kind, dst, base, offset } => {
5299                if let RegValue::DataAddress(base) = self.get_reg(base) {
5300                    return Some(BasicInst::LoadAbsolute {
5301                        kind,
5302                        dst,
5303                        target: base.map_offset_i64(|base| base.wrapping_add(cast(offset).to_i64_sign_extend())),
5304                    });
5305                }
5306            }
5307            BasicInst::LoadAddressIndirect { dst, target } => {
5308                return Some(BasicInst::LoadAddress { dst, target });
5309            }
5310            BasicInst::StoreIndirect { kind, src, base, offset } => {
5311                if let RegValue::DataAddress(base) = self.get_reg(base) {
5312                    return Some(BasicInst::StoreAbsolute {
5313                        kind,
5314                        src,
5315                        target: base.map_offset_i64(|base| base.wrapping_add(cast(offset).to_i64_sign_extend())),
5316                    });
5317                }
5318
5319                if let RegImm::Reg(src) = src {
5320                    if let RegValue::Constant(src_value) = self.get_reg(src) {
5321                        if let Ok(src_value) = src_value.try_into() {
5322                            return Some(BasicInst::StoreIndirect {
5323                                kind,
5324                                src: RegImm::Imm(src_value),
5325                                base,
5326                                offset,
5327                            });
5328                        }
5329                    }
5330                }
5331            }
5332            BasicInst::StoreAbsolute {
5333                kind,
5334                src: RegImm::Reg(src),
5335                target,
5336            } => {
5337                if let RegValue::Constant(src_value) = self.get_reg(src) {
5338                    if let Ok(src_value) = src_value.try_into() {
5339                        return Some(BasicInst::StoreAbsolute {
5340                            kind,
5341                            src: RegImm::Imm(src_value),
5342                            target,
5343                        });
5344                    }
5345                }
5346            }
5347            BasicInst::MoveReg { dst, src } => {
5348                if dst == src {
5349                    return Some(BasicInst::Nop);
5350                }
5351            }
5352            _ => {}
5353        }
5354
5355        None
5356    }
5357
5358    fn set_reg_unknown(&mut self, dst: Reg, unknown_counter: &mut u64, bits_used: u64) {
5359        let bits_used_masked = bits_used & self.bitness.bits_used_mask();
5360        if bits_used_masked == 0 {
5361            self.set_reg(dst, RegValue::Constant(0));
5362            return;
5363        }
5364
5365        self.set_reg(
5366            dst,
5367            RegValue::Unknown {
5368                unique: *unknown_counter,
5369                bits_used: bits_used_masked,
5370            },
5371        );
5372        *unknown_counter += 1;
5373    }
5374
5375    fn set_reg_from_control_instruction(
5376        &mut self,
5377        elf: &Elf,
5378        imports: &[Import],
5379        stack: &mut HashMap<RegValue, RegValue>,
5380        unknown_counter: &mut u64,
5381        instruction: ControlInst<BlockTarget>,
5382    ) {
5383        #[allow(clippy::single_match)]
5384        match instruction {
5385            ControlInst::CallIndirect { ra, target_return, .. } => {
5386                let implicit_instruction = BasicInst::LoadAddress {
5387                    dst: ra,
5388                    target: AnyTarget::Code(target_return),
5389                };
5390                self.set_reg_from_instruction(elf, imports, stack, unknown_counter, implicit_instruction);
5391            }
5392            _ => {}
5393        }
5394    }
5395
5396    fn set_reg_from_instruction(
5397        &mut self,
5398        elf: &Elf,
5399        imports: &[Import],
5400        stack: &mut HashMap<RegValue, RegValue>,
5401        unknown_counter: &mut u64,
5402        instruction: BasicInst<AnyTarget>,
5403    ) {
5404        match instruction {
5405            BasicInst::LoadImmediate { dst, imm } => {
5406                self.set_reg(dst, RegValue::Constant(cast(imm).to_i64_sign_extend()));
5407            }
5408            BasicInst::LoadImmediate64 { dst, imm } => {
5409                self.set_reg(dst, RegValue::Constant(imm));
5410            }
5411            BasicInst::LoadAddress {
5412                dst,
5413                target: AnyTarget::Code(target),
5414            }
5415            | BasicInst::LoadAddressIndirect {
5416                dst,
5417                target: AnyTarget::Code(target),
5418            } => {
5419                self.set_reg(dst, RegValue::CodeAddress(target));
5420            }
5421            BasicInst::LoadAddress {
5422                dst,
5423                target: AnyTarget::Data(target),
5424            }
5425            | BasicInst::LoadAddressIndirect {
5426                dst,
5427                target: AnyTarget::Data(target),
5428            } => {
5429                self.set_reg(dst, RegValue::DataAddress(target));
5430            }
5431            BasicInst::MoveReg { dst, src } => {
5432                self.set_reg(dst, self.get_reg(src));
5433            }
5434            BasicInst::AnyAny {
5435                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64 | AnyAnyKind::Or32 | AnyAnyKind::Or64,
5436                dst,
5437                src1,
5438                src2: RegImm::Imm(0),
5439            } => {
5440                self.set_reg(dst, self.get_reg(src1));
5441            }
5442            BasicInst::AnyAny {
5443                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64 | AnyAnyKind::Or32 | AnyAnyKind::Or64,
5444                dst,
5445                src1: RegImm::Imm(0),
5446                src2,
5447            } => {
5448                self.set_reg(dst, self.get_reg(src2));
5449            }
5450            BasicInst::AnyAny {
5451                kind,
5452                dst,
5453                src1: RegImm::Reg(reg),
5454                src2: RegImm::Imm(imm),
5455            }
5456            | BasicInst::AnyAny {
5457                kind,
5458                dst,
5459                src1: RegImm::Imm(imm),
5460                src2: RegImm::Reg(reg),
5461            } if kind == AnyAnyKind::add_for_bitness(self.bitness) && matches!(self.get_reg(reg), RegValue::Reg { .. }) => {
5462                let RegValue::Reg {
5463                    reg,
5464                    direction,
5465                    source_block,
5466                    bits_used,
5467                    addend,
5468                } = self.get_reg(reg)
5469                else {
5470                    unreachable!()
5471                };
5472
5473                self.set_reg(
5474                    dst,
5475                    RegValue::Reg {
5476                        reg,
5477                        direction,
5478                        source_block,
5479                        bits_used,
5480                        addend: cast(imm).to_i64_sign_extend().wrapping_add(addend),
5481                    },
5482                )
5483            }
5484            BasicInst::AnyAny {
5485                kind: AnyAnyKind::Add32 | AnyAnyKind::Add64,
5486                dst,
5487                src1,
5488                src2,
5489            } => {
5490                let src1_value = self.get_reg(src1);
5491                let src2_value = self.get_reg(src2);
5492                let bits_used =
5493                    src1_value.bits_used() | src2_value.bits_used() | (src1_value.bits_used() << 1) | (src2_value.bits_used() << 1);
5494
5495                self.set_reg_unknown(dst, unknown_counter, bits_used);
5496            }
5497            BasicInst::AnyAny {
5498                kind: AnyAnyKind::And32 | AnyAnyKind::And64,
5499                dst,
5500                src1,
5501                src2,
5502            } => {
5503                let src1_value = self.get_reg(src1);
5504                let src2_value = self.get_reg(src2);
5505                let bits_used = src1_value.bits_used() & src2_value.bits_used();
5506                self.set_reg_unknown(dst, unknown_counter, bits_used);
5507            }
5508            BasicInst::AnyAny {
5509                kind: AnyAnyKind::Or32 | AnyAnyKind::Or64,
5510                dst,
5511                src1,
5512                src2,
5513            } => {
5514                let src1_value = self.get_reg(src1);
5515                let src2_value = self.get_reg(src2);
5516                let bits_used = src1_value.bits_used() | src2_value.bits_used();
5517                self.set_reg_unknown(dst, unknown_counter, bits_used);
5518            }
5519            BasicInst::AnyAny {
5520                kind: AnyAnyKind::ShiftLogicalRight32,
5521                dst,
5522                src1,
5523                src2: RegImm::Imm(src2),
5524            } => {
5525                let src1_value = self.get_reg(src1);
5526                let bits_used = src1_value.bits_used() >> src2;
5527                self.set_reg_unknown(dst, unknown_counter, bits_used);
5528            }
5529            BasicInst::AnyAny {
5530                kind: AnyAnyKind::ShiftLogicalLeft32,
5531                dst,
5532                src1,
5533                src2: RegImm::Imm(src2),
5534            } => {
5535                let src1_value = self.get_reg(src1);
5536                let bits_used = src1_value.bits_used() << src2;
5537                self.set_reg_unknown(dst, unknown_counter, bits_used);
5538            }
5539            BasicInst::AnyAny {
5540                kind:
5541                    AnyAnyKind::SetLessThanSigned32
5542                    | AnyAnyKind::SetLessThanSigned64
5543                    | AnyAnyKind::SetLessThanUnsigned32
5544                    | AnyAnyKind::SetLessThanUnsigned64,
5545                dst,
5546                ..
5547            } => {
5548                self.set_reg_unknown(dst, unknown_counter, 1);
5549            }
5550            BasicInst::LoadAbsolute {
5551                kind: LoadKind::U8, dst, ..
5552            }
5553            | BasicInst::LoadIndirect {
5554                kind: LoadKind::U8, dst, ..
5555            } => {
5556                self.set_reg_unknown(dst, unknown_counter, u64::from(u8::MAX));
5557            }
5558            BasicInst::LoadAbsolute {
5559                kind: LoadKind::U16, dst, ..
5560            }
5561            | BasicInst::LoadIndirect {
5562                kind: LoadKind::U16, dst, ..
5563            } => {
5564                self.set_reg_unknown(dst, unknown_counter, u64::from(u16::MAX));
5565            }
5566            BasicInst::LoadAbsolute {
5567                kind: LoadKind::U32, dst, ..
5568            }
5569            | BasicInst::LoadIndirect {
5570                kind: LoadKind::U32, dst, ..
5571            } => {
5572                self.set_reg_unknown(dst, unknown_counter, u64::from(u32::MAX));
5573            }
5574            BasicInst::Prologue { stack_space, regs } => {
5575                let (kind, add_op) = if self.bitness == Bitness::B64 {
5576                    (AnyAnyKind::Add64, OperationKind::Add64)
5577                } else {
5578                    (AnyAnyKind::Add32, OperationKind::Add32)
5579                };
5580
5581                self.set_reg_from_instruction(
5582                    elf,
5583                    imports,
5584                    stack,
5585                    unknown_counter,
5586                    BasicInst::AnyAny {
5587                        kind,
5588                        dst: Reg::SP,
5589                        src1: Reg::SP.into(),
5590                        src2: (cast(stack_space).to_signed() * -1).into(),
5591                    },
5592                );
5593
5594                let sp = self.get_reg(Reg::SP);
5595                for (offset, reg) in regs {
5596                    let Some(key) = add_op.apply(elf, sp, RegValue::Constant(cast(cast(offset).to_signed()).to_i64_sign_extend())) else {
5597                        continue;
5598                    };
5599
5600                    if !matches!(key, RegValue::Reg { .. }) {
5601                        continue;
5602                    }
5603
5604                    let value = self.get_reg(reg);
5605                    stack.insert(key, value);
5606                }
5607            }
5608            BasicInst::Epilogue { stack_space, regs } => {
5609                let (add_kind, add_op, load_kind) = if self.bitness == Bitness::B64 {
5610                    (AnyAnyKind::Add64, OperationKind::Add64, LoadKind::U64)
5611                } else {
5612                    (AnyAnyKind::Add32, OperationKind::Add32, LoadKind::U32)
5613                };
5614
5615                let sp = self.get_reg(Reg::SP);
5616                self.set_reg_from_instruction(
5617                    elf,
5618                    imports,
5619                    stack,
5620                    unknown_counter,
5621                    BasicInst::AnyAny {
5622                        kind: add_kind,
5623                        dst: Reg::SP,
5624                        src1: Reg::SP.into(),
5625                        src2: cast(stack_space).to_signed().into(),
5626                    },
5627                );
5628
5629                let mut restored = [false; Reg::ALL.len()];
5630                for &(offset, reg) in &regs {
5631                    let Some(key) = add_op.apply(elf, sp, RegValue::Constant(cast(cast(offset).to_signed()).to_i64_sign_extend())) else {
5632                        continue;
5633                    };
5634
5635                    if let Some(value) = stack.remove(&key) {
5636                        self.set_reg(reg, value);
5637                        restored[reg.to_usize()] = true;
5638                    }
5639                }
5640
5641                for &(offset, dst) in &regs {
5642                    if restored[dst.to_usize()] {
5643                        continue;
5644                    }
5645
5646                    self.set_reg_from_instruction(
5647                        elf,
5648                        imports,
5649                        stack,
5650                        unknown_counter,
5651                        BasicInst::LoadIndirect {
5652                            kind: load_kind,
5653                            base: Reg::SP,
5654                            dst,
5655                            offset: cast(offset).to_signed(),
5656                        },
5657                    );
5658                }
5659            }
5660            _ => {
5661                for dst in instruction.dst_mask(imports) {
5662                    self.set_reg_unknown(dst, unknown_counter, self.bitness.bits_used_mask());
5663                }
5664            }
5665        }
5666    }
5667}
5668
5669#[allow(clippy::too_many_arguments)]
5670fn perform_constant_propagation(
5671    imports: &[Import],
5672    elf: &Elf,
5673    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
5674    info_for_block: &mut [BlockInfo],
5675    unknown_counter: &mut u64,
5676    reachability_graph: &mut ReachabilityGraph,
5677    mut optimize_queue: Option<&mut VecSet<BlockTarget>>,
5678    current: BlockTarget,
5679) -> bool {
5680    let is_rv64 = elf.is_64();
5681
5682    let Some(reachability) = reachability_graph.for_code.get(&current) else {
5683        return false;
5684    };
5685
5686    if reachability.is_unreachable() {
5687        return false;
5688    }
5689
5690    let mut stack: HashMap<RegValue, RegValue> = HashMap::new();
5691
5692    let mut modified = false;
5693    if !reachability.is_dynamically_reachable()
5694        && !reachability.always_reachable_or_exported()
5695        && !reachability.reachable_from.is_empty()
5696        && reachability.reachable_from.len() < 64
5697    {
5698        for reg in Reg::ALL {
5699            let mut common_value_opt = None;
5700            for &source in &reachability.reachable_from {
5701                let value = info_for_block[source.index()].output_regs.get_reg(reg);
5702                if let Some(common_value) = common_value_opt {
5703                    if common_value == value {
5704                        continue;
5705                    }
5706
5707                    common_value_opt = None;
5708                    break;
5709                } else {
5710                    common_value_opt = Some(value);
5711                }
5712            }
5713
5714            if let Some(value) = common_value_opt {
5715                let old_value = info_for_block[current.index()].input_regs.get_reg(reg);
5716                if value != old_value {
5717                    info_for_block[current.index()].input_regs.set_reg(reg, value);
5718                    modified = true;
5719                }
5720            }
5721        }
5722
5723        let mut common_stack_opt = None;
5724        for &source in &reachability.reachable_from {
5725            let source_stack = &info_for_block[source.index()].stack;
5726            if let Some(ref common_stack) = common_stack_opt {
5727                if common_stack == source_stack {
5728                    continue;
5729                }
5730
5731                common_stack_opt = None;
5732                break;
5733            } else {
5734                common_stack_opt = Some(source_stack.clone());
5735            }
5736        }
5737
5738        if let Some(common_stack) = common_stack_opt {
5739            stack = common_stack;
5740        }
5741    }
5742
5743    let mut regs = info_for_block[current.index()].input_regs.clone();
5744    for reg in Reg::ALL {
5745        if let RegValue::Reg {
5746            direction: Direction::Input,
5747            source_block,
5748            ..
5749        } = regs.get_reg(reg)
5750        {
5751            // Reset the input regs in case of loops.
5752            if source_block == current {
5753                regs.set_reg(
5754                    reg,
5755                    RegValue::Reg {
5756                        reg,
5757                        direction: Direction::Input,
5758                        source_block,
5759                        bits_used: regs.bitness.bits_used_mask(),
5760                        addend: 0,
5761                    },
5762                )
5763            }
5764        }
5765    }
5766    let mut references = BTreeSet::new();
5767    let mut modified_this_block = false;
5768    for nth_instruction in 0..all_blocks[current.index()].ops.len() {
5769        let mut instruction = all_blocks[current.index()].ops[nth_instruction].1.clone();
5770        if instruction.is_nop() {
5771            continue;
5772        }
5773
5774        if let BasicInst::Epilogue {
5775            regs: ref mut epilogue_regs,
5776            ..
5777        } = instruction
5778        {
5779            let add_op = if regs.bitness == Bitness::B64 {
5780                OperationKind::Add64
5781            } else {
5782                OperationKind::Add32
5783            };
5784
5785            let mut simplified = false;
5786            let sp = regs.get_reg(Reg::SP);
5787            epilogue_regs.retain(|&(offset, reg)| {
5788                if let Some(key) = add_op.apply(elf, sp, RegValue::Constant(cast(cast(offset).to_signed()).to_i64_sign_extend())) {
5789                    if let Some(&restored_value) = stack.get(&key) {
5790                        let current_value = regs.get_reg(reg);
5791                        if current_value == restored_value {
5792                            // If the register was saved on the stack but we haven't actually modified it then skip the restore.
5793                            simplified = true;
5794                            return false;
5795                        }
5796                    }
5797                }
5798
5799                true
5800            });
5801
5802            if simplified {
5803                if !modified_this_block {
5804                    references = gather_references(&all_blocks[current.index()]);
5805                    modified_this_block = true;
5806                    modified = true;
5807                }
5808                all_blocks[current.index()].ops[nth_instruction].1 = instruction.clone();
5809            }
5810        }
5811
5812        while let Some(new_instruction) = regs.simplify_instruction(elf, instruction.clone()) {
5813            log::trace!("Simplifying instruction in {}", all_blocks[current.index()].ops[nth_instruction].0);
5814            for reg in instruction.src_mask(imports) {
5815                log::trace!("  {reg:?} = {:?}", regs.get_reg(reg));
5816            }
5817            log::trace!("     {instruction:?}");
5818            log::trace!("  -> {new_instruction:?}");
5819
5820            if !modified_this_block {
5821                references = gather_references(&all_blocks[current.index()]);
5822                modified_this_block = true;
5823                modified = true;
5824            }
5825
5826            instruction = new_instruction.clone();
5827            all_blocks[current.index()].ops[nth_instruction].1 = new_instruction;
5828        }
5829
5830        if let &BasicInst::LoadAbsolute { kind, dst, target } = &instruction {
5831            let section = elf.section_by_index(target.section_index);
5832            if section.is_allocated() && !section.is_writable() {
5833                let value = match kind {
5834                    LoadKind::U64 => section
5835                        .data()
5836                        .get(target.offset as usize..target.offset as usize + 8)
5837                        .map(|xs| u64::from_le_bytes([xs[0], xs[1], xs[2], xs[3], xs[4], xs[5], xs[6], xs[7]]))
5838                        .map(|x| cast(x).to_signed()),
5839                    LoadKind::U32 => section
5840                        .data()
5841                        .get(target.offset as usize..target.offset as usize + 4)
5842                        .map(|xs| u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]))
5843                        .map(|x| cast(x).to_u64())
5844                        .map(|x| cast(x).to_signed()),
5845                    LoadKind::I32 => section
5846                        .data()
5847                        .get(target.offset as usize..target.offset as usize + 4)
5848                        .map(|xs| i32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]))
5849                        .map(|x| cast(x).to_i64_sign_extend()),
5850                    LoadKind::U16 => section
5851                        .data()
5852                        .get(target.offset as usize..target.offset as usize + 2)
5853                        .map(|xs| u16::from_le_bytes([xs[0], xs[1]]))
5854                        .map(|x| cast(x).to_u64())
5855                        .map(|x| cast(x).to_signed()),
5856                    LoadKind::I16 => section
5857                        .data()
5858                        .get(target.offset as usize..target.offset as usize + 2)
5859                        .map(|xs| i16::from_le_bytes([xs[0], xs[1]]))
5860                        .map(|x| cast(x).to_i64_sign_extend()),
5861                    LoadKind::I8 => section
5862                        .data()
5863                        .get(target.offset as usize)
5864                        .map(|&x| cast(x).to_signed())
5865                        .map(|x| cast(x).to_i64_sign_extend()),
5866                    LoadKind::U8 => section
5867                        .data()
5868                        .get(target.offset as usize)
5869                        .copied()
5870                        .map(|x| cast(x).to_u64())
5871                        .map(|x| cast(x).to_signed()),
5872                };
5873
5874                if let Some(imm) = value {
5875                    if !modified_this_block {
5876                        references = gather_references(&all_blocks[current.index()]);
5877                        modified_this_block = true;
5878                        modified = true;
5879                    }
5880
5881                    if let Ok(imm) = i32::try_from(imm) {
5882                        instruction = BasicInst::LoadImmediate { dst, imm };
5883                    } else if is_rv64 {
5884                        instruction = BasicInst::LoadImmediate64 { dst, imm };
5885                    } else {
5886                        unreachable!("load immediate overflow in 32-bit");
5887                    }
5888
5889                    all_blocks[current.index()].ops[nth_instruction].1 = instruction.clone();
5890                }
5891            }
5892        }
5893
5894        regs.set_reg_from_instruction(elf, imports, &mut stack, unknown_counter, instruction.clone());
5895    }
5896
5897    if let Some((extra_instruction, new_instruction)) = regs.simplify_control_instruction(elf, all_blocks[current.index()].next.instruction)
5898    {
5899        log::trace!("Simplifying end of {current:?}");
5900        log::trace!("     {:?}", all_blocks[current.index()].next.instruction);
5901        if let Some(ref extra_instruction) = extra_instruction {
5902            log::trace!("  -> {extra_instruction:?}");
5903        }
5904        log::trace!("  -> {new_instruction:?}");
5905
5906        if !modified_this_block {
5907            references = gather_references(&all_blocks[current.index()]);
5908            modified_this_block = true;
5909            modified = true;
5910        }
5911
5912        if let Some(extra_instruction) = extra_instruction {
5913            regs.set_reg_from_instruction(elf, imports, &mut stack, unknown_counter, extra_instruction.clone());
5914
5915            all_blocks[current.index()]
5916                .ops
5917                .push((all_blocks[current.index()].next.source.clone(), extra_instruction));
5918        }
5919        all_blocks[current.index()].next.instruction = new_instruction;
5920    }
5921
5922    regs.set_reg_from_control_instruction(
5923        elf,
5924        imports,
5925        &mut stack,
5926        unknown_counter,
5927        all_blocks[current.index()].next.instruction,
5928    );
5929
5930    for reg in Reg::ALL {
5931        if let RegValue::Unknown { bits_used, .. } = regs.get_reg(reg) {
5932            regs.set_reg(
5933                reg,
5934                RegValue::Reg {
5935                    reg,
5936                    direction: Direction::Output,
5937                    source_block: current,
5938                    bits_used,
5939                    addend: 0,
5940                },
5941            )
5942        }
5943    }
5944
5945    stack.retain(|_, value| !matches!(value, RegValue::Unknown { .. }));
5946
5947    let output_regs_modified = info_for_block[current.index()].output_regs != regs;
5948    if output_regs_modified {
5949        info_for_block[current.index()].output_regs = regs.clone();
5950        modified = true;
5951    }
5952
5953    let stack_modified = info_for_block[current.index()].stack != stack;
5954    if stack_modified {
5955        info_for_block[current.index()].stack = stack;
5956        modified = true;
5957    }
5958
5959    if modified_this_block {
5960        update_references(all_blocks, reachability_graph, optimize_queue.as_deref_mut(), current, references);
5961        if reachability_graph.is_code_reachable(current) {
5962            if let Some(ref mut optimize_queue) = optimize_queue {
5963                add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, current);
5964            }
5965        }
5966    }
5967
5968    if let Some(ref mut optimize_queue) = optimize_queue {
5969        if output_regs_modified || stack_modified {
5970            match all_blocks[current.index()].next.instruction {
5971                ControlInst::Jump { target } => add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target),
5972                ControlInst::Branch {
5973                    target_true, target_false, ..
5974                } => {
5975                    add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target_true);
5976                    add_to_optimize_queue(all_blocks, reachability_graph, optimize_queue, target_false);
5977                }
5978                ControlInst::Call { .. } => unreachable!(),
5979                _ => {}
5980            }
5981        }
5982    }
5983
5984    modified
5985}
5986
5987fn perform_load_address_and_jump_fusion(all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &ReachabilityGraph) {
5988    let used_blocks: Vec<_> = (0..all_blocks.len())
5989        .map(BlockTarget::from_raw)
5990        .filter(|&block_target| reachability_graph.is_code_reachable(block_target))
5991        .collect();
5992
5993    for window in used_blocks.windows(2) {
5994        let (current, next) = (window[0], window[1]);
5995        let Some(&(
5996            _,
5997            BasicInst::LoadAddress {
5998                dst,
5999                target: AnyTarget::Code(target_return),
6000            },
6001        )) = all_blocks[current.index()].ops.last()
6002        else {
6003            continue;
6004        };
6005
6006        if target_return != next {
6007            continue;
6008        }
6009
6010        all_blocks[current.index()].next.instruction = match all_blocks[current.index()].next.instruction {
6011            ControlInst::Jump { target } => ControlInst::Call {
6012                target,
6013                target_return,
6014                ra: dst,
6015            },
6016            ControlInst::JumpIndirect { base, offset } if dst != base => ControlInst::CallIndirect {
6017                base,
6018                offset,
6019                target_return,
6020                ra: dst,
6021            },
6022            _ => {
6023                continue;
6024            }
6025        };
6026
6027        all_blocks[current.index()].ops.pop();
6028    }
6029}
6030
6031fn find_terminator(
6032    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
6033    seen: &mut HashSet<BlockTarget>,
6034    terminator_for: &mut [Option<Terminator>],
6035    resolved_queue: &mut Vec<BlockTarget>,
6036    unresolved_set: &mut BTreeSet<BlockTarget>,
6037    current: BlockTarget,
6038) -> Terminator {
6039    if let Some(ref terminator) = terminator_for[current.index()] {
6040        return terminator.clone();
6041    }
6042
6043    if !seen.insert(current) {
6044        return Terminator::One(TerminatorKind::InfiniteLoop);
6045    }
6046
6047    let block = &all_blocks[current.index()];
6048    let terminator = match block.next.instruction {
6049        ControlInst::Jump { target } | ControlInst::Call { target, .. } => {
6050            find_terminator(all_blocks, seen, terminator_for, resolved_queue, unresolved_set, target)
6051        }
6052        ControlInst::JumpIndirect { base, offset } | ControlInst::CallIndirect { base, offset, .. } => {
6053            Terminator::One(TerminatorKind::JumpIndirect {
6054                block: current,
6055                base,
6056                offset,
6057            })
6058        }
6059        ControlInst::Branch {
6060            target_true, target_false, ..
6061        } => {
6062            let lhs = find_terminator(all_blocks, seen, terminator_for, resolved_queue, unresolved_set, target_true);
6063            let rhs = find_terminator(all_blocks, seen, terminator_for, resolved_queue, unresolved_set, target_false);
6064            Terminator::merge(lhs, rhs)
6065        }
6066        ControlInst::Unimplemented => Terminator::One(TerminatorKind::Unimplemented),
6067    };
6068
6069    seen.remove(&current);
6070    terminator_for[current.index()] = Some(terminator.clone());
6071    resolved_queue.push(current);
6072    unresolved_set.remove(&current);
6073
6074    terminator
6075}
6076
6077fn gather_terminators(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>]) -> Vec<Terminator> {
6078    let mut blocks_which_jump_to: Vec<Vec<BlockTarget>> = vec![Vec::new(); all_blocks.len()];
6079    let mut terminator_for: Vec<Option<Terminator>> = vec![None; all_blocks.len()];
6080    let mut resolved_queue = Vec::new();
6081    let mut unresolved_set = BTreeSet::new();
6082    let mut branch_queue = Vec::new();
6083    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
6084        match all_blocks[current.index()].next.instruction {
6085            ControlInst::Jump { target } | ControlInst::Call { target, .. } => {
6086                if target == current {
6087                    terminator_for[current.index()] = Some(Terminator::One(TerminatorKind::InfiniteLoop));
6088                    resolved_queue.push(current);
6089                } else {
6090                    blocks_which_jump_to[target.index()].push(current);
6091                    unresolved_set.insert(current);
6092                }
6093            }
6094            ControlInst::JumpIndirect { base, offset } | ControlInst::CallIndirect { base, offset, .. } => {
6095                terminator_for[current.index()] = Some(Terminator::One(TerminatorKind::JumpIndirect {
6096                    block: current,
6097                    base,
6098                    offset,
6099                }));
6100                resolved_queue.push(current);
6101            }
6102            ControlInst::Branch {
6103                target_true, target_false, ..
6104            } => {
6105                if target_true == current && target_false == current {
6106                    terminator_for[current.index()] = Some(Terminator::One(TerminatorKind::InfiniteLoop));
6107                    resolved_queue.push(current);
6108                    continue;
6109                }
6110
6111                if target_true == current {
6112                    blocks_which_jump_to[target_false.index()].push(current);
6113                } else if target_false == current {
6114                    blocks_which_jump_to[target_true.index()].push(current);
6115                }
6116                unresolved_set.insert(current);
6117            }
6118            ControlInst::Unimplemented => {
6119                terminator_for[current.index()] = Some(Terminator::One(TerminatorKind::Unimplemented));
6120                resolved_queue.push(current);
6121            }
6122        }
6123    }
6124
6125    while !resolved_queue.is_empty() || !branch_queue.is_empty() || !unresolved_set.is_empty() {
6126        while let Some(expected_target) = resolved_queue.pop() {
6127            for &current in &blocks_which_jump_to[expected_target.index()] {
6128                match all_blocks[current.index()].next.instruction {
6129                    ControlInst::Jump { target } | ControlInst::Call { target, .. } => {
6130                        assert_eq!(target, expected_target);
6131                        let terminator = terminator_for[expected_target.index()].clone();
6132                        assert!(terminator.is_some());
6133                        terminator_for[current.index()] = terminator;
6134                        resolved_queue.push(current);
6135                        unresolved_set.remove(&current);
6136                    }
6137                    ControlInst::Branch {
6138                        target_true, target_false, ..
6139                    } => {
6140                        let (target_terminator, other_target_terminator) = if target_true == expected_target {
6141                            (
6142                                terminator_for[target_true.index()].as_ref().unwrap(),
6143                                terminator_for[target_false.index()].as_ref(),
6144                            )
6145                        } else if target_false == expected_target {
6146                            (
6147                                terminator_for[target_false.index()].as_ref().unwrap(),
6148                                terminator_for[target_true.index()].as_ref(),
6149                            )
6150                        } else {
6151                            unreachable!()
6152                        };
6153
6154                        if let Some(other_target_terminator) = other_target_terminator {
6155                            terminator_for[current.index()] =
6156                                Some(Terminator::merge(target_terminator.clone(), other_target_terminator.clone()));
6157                            resolved_queue.push(current);
6158                            unresolved_set.remove(&current);
6159                            continue;
6160                        }
6161
6162                        branch_queue.push(current);
6163                    }
6164                    ControlInst::JumpIndirect { .. } | ControlInst::CallIndirect { .. } | ControlInst::Unimplemented => {
6165                        unreachable!()
6166                    }
6167                }
6168            }
6169        }
6170
6171        while let Some(current) = branch_queue.pop() {
6172            let ControlInst::Branch {
6173                target_true, target_false, ..
6174            } = all_blocks[current.index()].next.instruction
6175            else {
6176                unreachable!()
6177            };
6178            let terminator_true = terminator_for[target_true.index()].as_ref();
6179            let terminator_false = terminator_for[target_false.index()].as_ref();
6180            if terminator_true.is_some() && terminator_false.is_some() {
6181                continue;
6182            }
6183
6184            assert!((terminator_true.is_none() && terminator_false.is_some()) || (terminator_true.is_some() && terminator_false.is_none()));
6185            find_terminator(
6186                all_blocks,
6187                &mut HashSet::new(),
6188                &mut terminator_for,
6189                &mut resolved_queue,
6190                &mut unresolved_set,
6191                current,
6192            );
6193        }
6194
6195        while resolved_queue.is_empty() {
6196            let Some(current) = unresolved_set.pop_first() else { break };
6197            find_terminator(
6198                all_blocks,
6199                &mut HashSet::new(),
6200                &mut terminator_for,
6201                &mut resolved_queue,
6202                &mut unresolved_set,
6203                current,
6204            );
6205        }
6206    }
6207
6208    terminator_for.into_iter().map(Option::unwrap).collect()
6209}
6210
6211#[deny(clippy::as_conversions)]
6212fn optimize_program(
6213    config: &Config,
6214    elf: &Elf,
6215    isa: InstructionSetKind,
6216    imports: &[Import],
6217    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
6218    reachability_graph: &mut ReachabilityGraph,
6219    exports: &mut [Export],
6220) -> Vec<BlockInfo> {
6221    let bitness = if elf.is_64() { Bitness::B64 } else { Bitness::B32 };
6222
6223    let mut optimize_queue = VecSet::new();
6224    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
6225        if !reachability_graph.is_code_reachable(current) {
6226            all_blocks[current.index()].ops.clear();
6227            all_blocks[current.index()].next.instruction = ControlInst::Unimplemented;
6228            continue;
6229        }
6230
6231        perform_nop_elimination(all_blocks, current);
6232
6233        let block = &mut all_blocks[current.index()];
6234        block.next.instruction = match block.next.instruction {
6235            ControlInst::Call { ra, target, target_return } => {
6236                block.ops.push((
6237                    block.next.source.clone(),
6238                    BasicInst::LoadAddress {
6239                        dst: ra,
6240                        target: AnyTarget::Code(target_return),
6241                    },
6242                ));
6243                ControlInst::Jump { target }
6244            }
6245            ControlInst::CallIndirect {
6246                ra,
6247                target_return,
6248                base,
6249                offset,
6250            } if ra != base => {
6251                block.ops.push((
6252                    block.next.source.clone(),
6253                    BasicInst::LoadAddress {
6254                        dst: ra,
6255                        target: AnyTarget::Code(target_return),
6256                    },
6257                ));
6258                ControlInst::JumpIndirect { base, offset }
6259            }
6260            instruction => instruction,
6261        };
6262
6263        optimize_queue.push(current);
6264    }
6265
6266    let terminators = gather_terminators(all_blocks);
6267    let mut unknown_counter = 0;
6268    let mut info_for_block = Vec::with_capacity(all_blocks.len());
6269    for (current, terminator) in (0..all_blocks.len()).map(BlockTarget::from_raw).zip(terminators.into_iter()) {
6270        info_for_block.push(BlockInfo {
6271            input_regs: BlockRegs::new_input(bitness, current),
6272            output_regs: BlockRegs::new_output(bitness, current),
6273            registers_needed: RegMask::all(),
6274            stack: HashMap::new(),
6275            terminator,
6276        });
6277    }
6278
6279    if isa.supports_opcode(Opcode::unlikely) {
6280        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
6281            let ControlInst::Branch {
6282                target_true, target_false, ..
6283            } = all_blocks[current.index()].next.instruction
6284            else {
6285                continue;
6286            };
6287
6288            let terminator_true = &info_for_block[target_true.index()].terminator;
6289            let terminator_false = &info_for_block[target_false.index()].terminator;
6290
6291            if matches!(terminator_true, Terminator::One(TerminatorKind::Unimplemented)) {
6292                all_blocks[target_true.index()].is_unlikely = true;
6293                continue;
6294            }
6295
6296            if matches!(terminator_false, Terminator::One(TerminatorKind::Unimplemented)) {
6297                all_blocks[target_false.index()].is_unlikely = true;
6298                continue;
6299            }
6300        }
6301    }
6302
6303    let mut count_inline = 0;
6304    let mut count_dce = 0;
6305    let mut count_cp = 0;
6306
6307    let mut inline_history: HashSet<(BlockTarget, BlockTarget)> = HashSet::new(); // Necessary to prevent infinite loops.
6308    macro_rules! run_optimizations {
6309        ($current:expr, $optimize_queue:expr) => {{
6310            let mut modified = false;
6311            if reachability_graph.is_code_reachable($current) {
6312                perform_nop_elimination(all_blocks, $current);
6313
6314                if perform_inlining(
6315                    all_blocks,
6316                    reachability_graph,
6317                    exports,
6318                    $optimize_queue,
6319                    &mut inline_history,
6320                    config.inline_threshold,
6321                    $current,
6322                ) {
6323                    count_inline += 1;
6324                    modified |= true;
6325                }
6326
6327                if perform_dead_code_elimination(
6328                    config,
6329                    imports,
6330                    all_blocks,
6331                    &mut info_for_block,
6332                    reachability_graph,
6333                    $optimize_queue,
6334                    $current,
6335                ) {
6336                    count_dce += 1;
6337                    modified |= true;
6338                }
6339
6340                if perform_constant_propagation(
6341                    imports,
6342                    elf,
6343                    all_blocks,
6344                    &mut info_for_block,
6345                    &mut unknown_counter,
6346                    reachability_graph,
6347                    $optimize_queue,
6348                    $current,
6349                ) {
6350                    count_cp += 1;
6351                    modified |= true;
6352                }
6353            }
6354
6355            modified
6356        }};
6357    }
6358
6359    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
6360        run_optimizations!(current, None);
6361    }
6362
6363    garbage_collect_reachability(all_blocks, reachability_graph);
6364
6365    let timestamp = std::time::Instant::now();
6366    let mut opt_iteration_count = 0;
6367    while let Some(current) = optimize_queue.pop_non_unique() {
6368        loop {
6369            if !run_optimizations!(current, Some(&mut optimize_queue)) {
6370                break;
6371            }
6372        }
6373        opt_iteration_count += 1;
6374    }
6375
6376    log::debug!(
6377        "Optimizing the program took {opt_iteration_count} iteration(s) and {}ms",
6378        timestamp.elapsed().as_millis()
6379    );
6380    log::debug!("             Inlinining: {count_inline}");
6381    log::debug!("  Dead code elimination: {count_dce}");
6382    log::debug!("   Constant propagation: {count_cp}");
6383    garbage_collect_reachability(all_blocks, reachability_graph);
6384
6385    inline_history.clear();
6386    count_inline = 0;
6387    count_dce = 0;
6388    count_cp = 0;
6389
6390    for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
6391        perform_meta_instruction_lowering(elf.is_64(), all_blocks, current);
6392    }
6393
6394    let timestamp = std::time::Instant::now();
6395    let mut opt_brute_force_iterations = 0;
6396    let mut modified = true;
6397    while modified {
6398        opt_brute_force_iterations += 1;
6399        modified = false;
6400        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
6401            modified |= run_optimizations!(current, Some(&mut optimize_queue));
6402        }
6403
6404        while let Some(current) = optimize_queue.pop_non_unique() {
6405            loop {
6406                if !run_optimizations!(current, Some(&mut optimize_queue)) {
6407                    break;
6408                }
6409            }
6410        }
6411
6412        if modified {
6413            garbage_collect_reachability(all_blocks, reachability_graph);
6414        }
6415    }
6416
6417    perform_load_address_and_jump_fusion(all_blocks, reachability_graph);
6418
6419    log::debug!(
6420        "Optimizing the program took {} brute force iteration(s) and {} ms",
6421        opt_brute_force_iterations - 1,
6422        timestamp.elapsed().as_millis()
6423    );
6424    log::debug!("             Inlinining: {count_inline}");
6425    log::debug!("  Dead code elimination: {count_dce}");
6426    log::debug!("   Constant propagation: {count_cp}");
6427
6428    info_for_block
6429}
6430
6431#[cfg(test)]
6432mod test {
6433    use super::*;
6434    use polkavm::Reg;
6435
6436    struct ProgramBuilder {
6437        data_section: SectionIndex,
6438        current_section: SectionIndex,
6439        next_free_section: SectionIndex,
6440        next_offset_for_section: HashMap<SectionIndex, u64>,
6441        instructions: Vec<(Source, InstExt<SectionTarget, SectionTarget>)>,
6442        exports: Vec<Export>,
6443    }
6444
6445    struct TestProgram {
6446        disassembly: String,
6447        instance: polkavm::RawInstance,
6448    }
6449
6450    impl ProgramBuilder {
6451        fn new() -> Self {
6452            ProgramBuilder {
6453                data_section: SectionIndex::new(0),
6454                current_section: SectionIndex::new(1),
6455                next_free_section: SectionIndex::new(1),
6456                next_offset_for_section: HashMap::default(),
6457                instructions: Vec::new(),
6458                exports: Vec::new(),
6459            }
6460        }
6461
6462        fn from_assembly(assembly: &str) -> Self {
6463            let mut b = Self::new();
6464            b.append_assembly(assembly);
6465            b
6466        }
6467
6468        fn add_export(&mut self, name: impl AsRef<[u8]>, input_regs: u8, output_regs: u8, location: SectionTarget) {
6469            self.exports.push(Export {
6470                location,
6471                metadata: ExternMetadata {
6472                    index: None,
6473                    symbol: name.as_ref().to_owned(),
6474                    input_regs,
6475                    output_regs,
6476                },
6477            })
6478        }
6479
6480        fn add_section(&mut self) -> SectionTarget {
6481            let index = self.next_free_section;
6482            self.next_offset_for_section.insert(index, 0);
6483            self.next_free_section = SectionIndex::new(index.raw() + 1);
6484            SectionTarget {
6485                section_index: index,
6486                offset: 0,
6487            }
6488        }
6489
6490        fn switch_section(&mut self, section_index: impl Into<SectionIndex>) {
6491            self.current_section = section_index.into();
6492        }
6493
6494        fn current_source(&self) -> Source {
6495            let next_offset = self.next_offset_for_section.get(&self.current_section).copied().unwrap_or(0);
6496            Source {
6497                section_index: self.current_section,
6498                offset_range: (next_offset..next_offset + 4).into(),
6499            }
6500        }
6501
6502        fn push(&mut self, inst: impl Into<InstExt<SectionTarget, SectionTarget>>) -> SectionTarget {
6503            let source = self.current_source();
6504            *self.next_offset_for_section.get_mut(&self.current_section).unwrap() += 4;
6505            self.instructions.push((source, inst.into()));
6506            source.begin()
6507        }
6508
6509        fn append_assembly(&mut self, assembly: &str) {
6510            let isa = InstructionSetKind::Latest32;
6511            let raw_blob = polkavm_common::assembler::assemble(Some(isa), assembly).unwrap();
6512            let blob = ProgramBlob::parse(raw_blob.into()).unwrap();
6513            let mut program_counter_to_section_target = HashMap::new();
6514            let mut program_counter_to_instruction_index = HashMap::new();
6515            let mut in_new_block = true;
6516            for instruction in blob.instructions() {
6517                if in_new_block {
6518                    let block = self.add_section();
6519                    self.switch_section(block);
6520                    program_counter_to_section_target.insert(instruction.offset, block);
6521                    in_new_block = false;
6522                }
6523
6524                program_counter_to_instruction_index.insert(instruction.offset, self.instructions.len());
6525                self.push(BasicInst::Nop);
6526
6527                if instruction.kind.starts_new_basic_block() {
6528                    in_new_block = true;
6529                }
6530            }
6531
6532            for instruction in blob.instructions() {
6533                let out = &mut self.instructions[*program_counter_to_instruction_index.get(&instruction.offset).unwrap()].1;
6534                match instruction.kind {
6535                    Instruction::fallthrough => {
6536                        let target = *program_counter_to_section_target.get(&instruction.next_offset).unwrap();
6537                        *out = ControlInst::Jump { target }.into();
6538                    }
6539                    Instruction::jump(target) => {
6540                        let target = *program_counter_to_section_target.get(&polkavm::ProgramCounter(target)).unwrap();
6541                        *out = ControlInst::Jump { target }.into();
6542                    }
6543                    Instruction::load_imm(dst, imm) => {
6544                        *out = BasicInst::LoadImmediate {
6545                            dst: dst.into(),
6546                            imm: cast(imm).to_signed(),
6547                        }
6548                        .into();
6549                    }
6550                    Instruction::add_imm_32(dst, src, imm) => {
6551                        *out = BasicInst::AnyAny {
6552                            kind: AnyAnyKind::Add32,
6553                            dst: dst.into(),
6554                            src1: src.into(),
6555                            src2: cast(imm).to_signed().into(),
6556                        }
6557                        .into();
6558                    }
6559                    Instruction::add_32(dst, src1, src2) => {
6560                        *out = BasicInst::AnyAny {
6561                            kind: AnyAnyKind::Add32,
6562                            dst: dst.into(),
6563                            src1: src1.into(),
6564                            src2: src2.into(),
6565                        }
6566                        .into();
6567                    }
6568                    Instruction::branch_less_unsigned_imm(src1, src2, target) | Instruction::branch_eq_imm(src1, src2, target) => {
6569                        let target_true = *program_counter_to_section_target.get(&polkavm::ProgramCounter(target)).unwrap();
6570                        let target_false = *program_counter_to_section_target.get(&instruction.next_offset).unwrap();
6571                        *out = ControlInst::Branch {
6572                            kind: match instruction.kind {
6573                                Instruction::branch_less_unsigned_imm(..) => BranchKind::LessUnsigned32,
6574                                Instruction::branch_eq_imm(..) => BranchKind::Eq32,
6575                                _ => unreachable!(),
6576                            },
6577                            src1: src1.into(),
6578                            src2: cast(src2).to_signed().into(),
6579                            target_true,
6580                            target_false,
6581                        }
6582                        .into();
6583                    }
6584                    Instruction::jump_indirect(base, 0) => {
6585                        *out = ControlInst::JumpIndirect {
6586                            base: base.into(),
6587                            offset: 0,
6588                        }
6589                        .into();
6590                    }
6591                    Instruction::trap => {
6592                        *out = ControlInst::Unimplemented.into();
6593                    }
6594                    Instruction::store_u32(src, address) => {
6595                        *out = BasicInst::StoreAbsolute {
6596                            kind: StoreKind::U32,
6597                            src: src.into(),
6598                            target: SectionTarget {
6599                                section_index: self.data_section,
6600                                offset: u64::from(address),
6601                            },
6602                        }
6603                        .into();
6604                    }
6605                    Instruction::store_indirect_u32(src, base, offset) => {
6606                        *out = BasicInst::StoreIndirect {
6607                            kind: StoreKind::U32,
6608                            src: src.into(),
6609                            base: base.into(),
6610                            offset: cast(offset).to_signed(),
6611                        }
6612                        .into();
6613                    }
6614                    _ => unimplemented!("{instruction:?}"),
6615                }
6616            }
6617
6618            for export in blob.exports() {
6619                let input_regs = 1;
6620                let output_regs = 1;
6621                let target = program_counter_to_section_target.get(&export.program_counter()).unwrap();
6622                self.add_export(export.symbol().as_bytes(), input_regs, output_regs, *target);
6623            }
6624        }
6625
6626        fn build(&self, config: Config) -> TestProgram {
6627            let isa = InstructionSetKind::Latest32;
6628            let elf = Elf::default();
6629            let data_sections_set: HashSet<_> = core::iter::once(self.data_section).collect();
6630            let code_sections_set: HashSet<_> = self.next_offset_for_section.keys().copied().collect();
6631            let relocations = BTreeMap::default();
6632            let imports = [];
6633            let mut exports = self.exports.clone();
6634
6635            // TODO: Refactor the main code so that we don't have to copy-paste this here.
6636            let all_jump_targets = harvest_all_jump_targets(
6637                &elf,
6638                &data_sections_set,
6639                &code_sections_set,
6640                &self.instructions,
6641                &relocations,
6642                &exports,
6643            )
6644            .unwrap();
6645
6646            let all_blocks = split_code_into_basic_blocks(&elf, &Default::default(), &all_jump_targets, self.instructions.clone()).unwrap();
6647            let mut section_to_block = build_section_to_block_map(&all_blocks).unwrap();
6648            let mut all_blocks = resolve_basic_block_references(&data_sections_set, &section_to_block, &all_blocks).unwrap();
6649            let mut reachability_graph =
6650                calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations).unwrap();
6651            if matches!(config.opt_level, OptLevel::O2 | OptLevel::Oexperimental) {
6652                optimize_program(&config, &elf, isa, &imports, &mut all_blocks, &mut reachability_graph, &mut exports);
6653            }
6654            let mut used_blocks = collect_used_blocks(&all_blocks, &reachability_graph);
6655
6656            if matches!(config.opt_level, OptLevel::O2 | OptLevel::Oexperimental) {
6657                used_blocks = add_missing_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, used_blocks);
6658                merge_consecutive_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, &mut section_to_block, &mut used_blocks);
6659                replace_immediates_with_registers(&mut all_blocks, &imports, &used_blocks);
6660            }
6661
6662            let expected_reachability_graph =
6663                calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations).unwrap();
6664            assert!(reachability_graph == expected_reachability_graph);
6665
6666            let used_imports = HashSet::new();
6667            let mut base_address_for_section = HashMap::new();
6668            base_address_for_section.insert(self.data_section, 0);
6669            let section_got = self.next_free_section;
6670            let target_to_got_offset = HashMap::new();
6671
6672            let (jump_table, jump_target_for_block) = build_jump_table(all_blocks.len(), &used_blocks, &reachability_graph);
6673            let (code, _) = emit_code(
6674                &Default::default(),
6675                &imports,
6676                &base_address_for_section,
6677                section_got,
6678                &target_to_got_offset,
6679                &all_blocks,
6680                &used_blocks,
6681                &used_imports,
6682                &jump_target_for_block,
6683                true,
6684                false,
6685                0,
6686            )
6687            .unwrap();
6688
6689            let mut builder = ProgramBlobBuilder::new(isa);
6690
6691            let mut export_count = 0;
6692            for current in used_blocks {
6693                for &export_index in &reachability_graph.for_code.get(&current).unwrap().exports {
6694                    let export = &exports[export_index];
6695                    let jump_target = jump_target_for_block[current.index()]
6696                        .expect("internal error: export metadata points to a block without a jump target assigned");
6697
6698                    builder.add_export_by_basic_block(jump_target.static_target, &export.metadata.symbol);
6699                    export_count += 1;
6700                }
6701            }
6702            assert_eq!(export_count, exports.len());
6703
6704            let mut raw_code = Vec::with_capacity(code.len());
6705            for (_, inst) in code {
6706                raw_code.push(inst);
6707            }
6708
6709            builder.set_code(&raw_code, &jump_table);
6710            builder.set_rw_data_size(1);
6711
6712            let blob = ProgramBlob::parse(builder.to_vec().unwrap().into()).unwrap();
6713            let mut disassembler = polkavm_disassembler::Disassembler::new(&blob, polkavm_disassembler::DisassemblyFormat::Guest).unwrap();
6714            disassembler.emit_header(false);
6715            disassembler.show_offsets(false);
6716            let mut buf = Vec::new();
6717            disassembler.disassemble_into(&mut buf).unwrap();
6718            let disassembly = String::from_utf8(buf).unwrap();
6719
6720            let mut config = polkavm::Config::from_env().unwrap();
6721            config.set_backend(Some(polkavm::BackendKind::Interpreter));
6722            let engine = polkavm::Engine::new(&config).unwrap();
6723            let mut module_config = polkavm::ModuleConfig::default();
6724            module_config.set_gas_metering(Some(polkavm::GasMeteringKind::Sync));
6725            let module = polkavm::Module::from_blob(&engine, &module_config, blob).unwrap();
6726            let mut instance = module.instantiate().unwrap();
6727            instance.set_gas(10000);
6728            instance.set_reg(polkavm::Reg::RA, polkavm::RETURN_TO_HOST);
6729            let pc = module.exports().find(|export| export.symbol() == "main").unwrap().program_counter();
6730            instance.set_next_program_counter(pc);
6731
6732            TestProgram { disassembly, instance }
6733        }
6734
6735        fn test_optimize(
6736            &self,
6737            mut run: impl FnMut(&mut polkavm::RawInstance),
6738            mut check: impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance),
6739            expected_disassembly: &str,
6740        ) {
6741            let mut unopt = self.build(Config {
6742                opt_level: OptLevel::O0,
6743                ..Config::default()
6744            });
6745            let mut opt = self.build(Config {
6746                opt_level: OptLevel::O2,
6747                ..Config::default()
6748            });
6749
6750            log::info!("Unoptimized disassembly:\n{}", unopt.disassembly);
6751            log::info!("Optimized disassembly:\n{}", opt.disassembly);
6752
6753            run(&mut unopt.instance);
6754            run(&mut opt.instance);
6755
6756            check(&mut opt.instance, &mut unopt.instance);
6757
6758            fn normalize(s: &str) -> String {
6759                let mut out = String::new();
6760                for line in s.trim().lines() {
6761                    if !line.trim().starts_with('@') {
6762                        out.push_str("    ");
6763                    }
6764                    out.push_str(line.trim());
6765                    out.push('\n');
6766                }
6767                out
6768            }
6769
6770            let is_todo = expected_disassembly.trim() == "TODO";
6771            let actual_normalized = normalize(&opt.disassembly);
6772            let expected_normalized = normalize(expected_disassembly);
6773            if actual_normalized != expected_normalized && !is_todo {
6774                use core::fmt::Write;
6775                let mut output_actual = String::new();
6776                let mut output_expected = String::new();
6777                for diff in diff::lines(&actual_normalized, &expected_normalized) {
6778                    match diff {
6779                        diff::Result::Left(line) => {
6780                            writeln!(&mut output_actual, "{}", yansi::Paint::red(line)).unwrap();
6781                        }
6782                        diff::Result::Both(line, _) => {
6783                            writeln!(&mut output_actual, "{}", line).unwrap();
6784                            writeln!(&mut output_expected, "{}", line).unwrap();
6785                        }
6786                        diff::Result::Right(line) => {
6787                            writeln!(&mut output_expected, "{}", line).unwrap();
6788                        }
6789                    }
6790                }
6791
6792                {
6793                    use std::io::Write;
6794                    let stderr = std::io::stderr();
6795                    let mut stderr = stderr.lock();
6796
6797                    writeln!(&mut stderr, "Optimization test failed!\n").unwrap();
6798                    writeln!(&mut stderr, "Expected optimized:").unwrap();
6799                    writeln!(&mut stderr, "{output_expected}").unwrap();
6800                    writeln!(&mut stderr, "Actual optimized:").unwrap();
6801                    writeln!(&mut stderr, "{output_actual}").unwrap();
6802                }
6803
6804                panic!("optimized program is not what we've expected")
6805            }
6806
6807            if is_todo {
6808                todo!();
6809            }
6810        }
6811
6812        fn test_optimize_oneshot(
6813            assembly: &str,
6814            expected_disassembly: &str,
6815            run: impl FnMut(&mut polkavm::RawInstance),
6816            check: impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance),
6817        ) {
6818            let _ = env_logger::try_init();
6819            let b = ProgramBuilder::from_assembly(assembly);
6820            b.test_optimize(run, check, expected_disassembly);
6821        }
6822    }
6823
6824    fn expect_finished(i: &mut polkavm::RawInstance) {
6825        assert!(matches!(i.run().unwrap(), polkavm::InterruptKind::Finished));
6826    }
6827
6828    fn expect_regs(regs: impl IntoIterator<Item = (Reg, u64)> + Clone) -> impl FnMut(&mut polkavm::RawInstance, &mut polkavm::RawInstance) {
6829        move |a: &mut polkavm::RawInstance, b: &mut polkavm::RawInstance| {
6830            for (reg, value) in regs.clone() {
6831                assert_eq!(b.reg(reg), value);
6832                assert_eq!(a.reg(reg), b.reg(reg));
6833            }
6834        }
6835    }
6836
6837    #[test]
6838    fn test_optimize_01_empty_block_elimination() {
6839        ProgramBuilder::test_optimize_oneshot(
6840            "
6841            pub @main:
6842                jump @loop
6843            @before_loop:
6844                jump @loop
6845            @loop:
6846                i32 a0 = a0 + 0x1
6847                jump @before_loop if a0 <u 10
6848                ret
6849            ",
6850            "
6851            @0 [export #0: 'main']
6852                a0 = a0 + 0x1
6853                jump @0 if a0 <u 10
6854            @1
6855                ret
6856            ",
6857            expect_finished,
6858            expect_regs([(Reg::A0, 10)]),
6859        )
6860    }
6861
6862    #[test]
6863    fn test_optimize_02_simple_constant_propagation() {
6864        ProgramBuilder::test_optimize_oneshot(
6865            "
6866            pub @main:
6867                a1 = 0
6868                i32 a1 = a1 + 1
6869            @loop:
6870                i32 a0 = a0 + a1
6871                jump @loop if a0 <u 10
6872                ret
6873            ",
6874            "
6875            @0 [export #0: 'main']
6876                a1 = 0x1
6877                fallthrough
6878            @1
6879                a0 = a0 + a1
6880                jump @1 if a0 <u 10
6881            @2
6882                ret
6883            ",
6884            expect_finished,
6885            expect_regs([(Reg::A0, 10), (Reg::A1, 1)]),
6886        )
6887    }
6888
6889    #[test]
6890    fn test_optimize_03_simple_dead_code_elimination() {
6891        ProgramBuilder::test_optimize_oneshot(
6892            "
6893            pub @main:
6894                i32 a1 = a1 + 100
6895                a1 = 8
6896                i32 a2 = a2 + 0
6897                i32 a0 = a0 + 1
6898                jump @main if a0 <u 10
6899                ret
6900            ",
6901            "
6902            @0 [export #0: 'main']
6903                a1 = 0x8
6904                a0 = a0 + 0x1
6905                jump @0 if a0 <u 10
6906            @1
6907                ret
6908            ",
6909            expect_finished,
6910            expect_regs([(Reg::A0, 10), (Reg::A1, 8)]),
6911        )
6912    }
6913}
6914
6915fn collect_used_blocks(all_blocks: &[BasicBlock<AnyTarget, BlockTarget>], reachability_graph: &ReachabilityGraph) -> Vec<BlockTarget> {
6916    let mut used_blocks = Vec::new();
6917    for block in all_blocks {
6918        if !reachability_graph.is_code_reachable(block.target) {
6919            continue;
6920        }
6921
6922        used_blocks.push(block.target);
6923    }
6924
6925    used_blocks
6926}
6927
6928fn add_missing_fallthrough_blocks(
6929    all_blocks: &mut Vec<BasicBlock<AnyTarget, BlockTarget>>,
6930    reachability_graph: &mut ReachabilityGraph,
6931    used_blocks: Vec<BlockTarget>,
6932) -> Vec<BlockTarget> {
6933    let mut new_used_blocks = Vec::new();
6934    let can_fallthrough_to_next_block = calculate_whether_can_fallthrough(all_blocks, &used_blocks);
6935    for current in used_blocks {
6936        new_used_blocks.push(current);
6937        if can_fallthrough_to_next_block.contains(&current) {
6938            continue;
6939        }
6940
6941        let Some(target) = all_blocks[current.index()].next.instruction.fallthrough_target_mut().copied() else {
6942            continue;
6943        };
6944
6945        let inline_target = target != current
6946            && all_blocks[target.index()].ops.is_empty()
6947            && all_blocks[target.index()].next.instruction.fallthrough_target_mut().is_none();
6948
6949        let new_block_index = BlockTarget::from_raw(all_blocks.len());
6950        all_blocks.push(BasicBlock {
6951            target: new_block_index,
6952            source: all_blocks[current.index()].source,
6953            ops: Default::default(),
6954            next: if inline_target {
6955                all_blocks[target.index()].next.clone()
6956            } else {
6957                EndOfBlock {
6958                    source: all_blocks[current.index()].next.source.clone(),
6959                    instruction: ControlInst::Jump { target },
6960                }
6961            },
6962            is_function: false,
6963            is_unlikely: false,
6964        });
6965
6966        new_used_blocks.push(new_block_index);
6967
6968        reachability_graph
6969            .for_code
6970            .entry(new_block_index)
6971            .or_insert(Reachability::default())
6972            .always_reachable = true;
6973        update_references(all_blocks, reachability_graph, None, new_block_index, Default::default());
6974        reachability_graph.for_code.get_mut(&new_block_index).unwrap().always_reachable = false;
6975
6976        let references = gather_references(&all_blocks[current.index()]);
6977        *all_blocks[current.index()].next.instruction.fallthrough_target_mut().unwrap() = new_block_index;
6978        update_references(all_blocks, reachability_graph, None, current, references);
6979    }
6980
6981    new_used_blocks
6982}
6983
6984fn merge_consecutive_fallthrough_blocks(
6985    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
6986    reachability_graph: &mut ReachabilityGraph,
6987    section_to_block: &mut HashMap<SectionTarget, BlockTarget>,
6988    used_blocks: &mut Vec<BlockTarget>,
6989) {
6990    if used_blocks.len() < 2 {
6991        return;
6992    }
6993
6994    let mut removed = HashSet::new();
6995    for nth_block in 0..used_blocks.len() - 1 {
6996        let current = used_blocks[nth_block];
6997        let next = used_blocks[nth_block + 1];
6998
6999        // Find blocks which are empty...
7000        if !all_blocks[current.index()].ops.is_empty() {
7001            continue;
7002        }
7003
7004        // ...and which immediately jump somewhere else.
7005        {
7006            let ControlInst::Jump { target } = all_blocks[current.index()].next.instruction else {
7007                continue;
7008            };
7009            if target != next {
7010                continue;
7011            }
7012        }
7013
7014        let current_reachability = reachability_graph.for_code.get_mut(&current).unwrap();
7015        if current_reachability.always_reachable_or_exported() {
7016            continue;
7017        }
7018
7019        removed.insert(current);
7020
7021        // Gather all other basic blocks which reference this block.
7022        let referenced_by_code: BTreeSet<BlockTarget> = current_reachability
7023            .reachable_from
7024            .iter()
7025            .copied()
7026            .chain(current_reachability.address_taken_in.iter().copied())
7027            .collect();
7028
7029        // Replace code references to this block.
7030        for dep in referenced_by_code {
7031            let references = gather_references(&all_blocks[dep.index()]);
7032            for (_, op) in &mut all_blocks[dep.index()].ops {
7033                *op = op
7034                    .clone()
7035                    .map_target(|target| {
7036                        Ok::<_, ()>(if target == AnyTarget::Code(current) {
7037                            AnyTarget::Code(next)
7038                        } else {
7039                            target
7040                        })
7041                    })
7042                    .unwrap();
7043            }
7044
7045            all_blocks[dep.index()].next.instruction = all_blocks[dep.index()]
7046                .next
7047                .instruction
7048                .map_target(|target| Ok::<_, ()>(if target == current { next } else { target }))
7049                .unwrap();
7050
7051            update_references(all_blocks, reachability_graph, None, dep, references);
7052        }
7053
7054        // Remove it from the graph if it's globally unreachable now.
7055        remove_code_if_globally_unreachable(all_blocks, reachability_graph, None, current);
7056
7057        let Some(current_reachability) = reachability_graph.for_code.get_mut(&current) else {
7058            continue;
7059        };
7060
7061        if !current_reachability.referenced_by_data.is_empty() {
7062            // Find all section targets which correspond to this block...
7063            let section_targets: Vec<SectionTarget> = section_to_block
7064                .iter()
7065                .filter(|&(_, block_target)| *block_target == current)
7066                .map(|(section_target, _)| *section_target)
7067                .collect();
7068
7069            // ...then make them to point to the new block.
7070            for section_target in section_targets {
7071                section_to_block.insert(section_target, next);
7072            }
7073
7074            // Grab all of the data sections which reference the current block.
7075            let referenced_by_data = core::mem::take(&mut current_reachability.referenced_by_data);
7076
7077            // Mark the next block as referenced by all of the data sections which reference the current block.
7078            reachability_graph
7079                .for_code
7080                .get_mut(&next)
7081                .unwrap()
7082                .referenced_by_data
7083                .extend(referenced_by_data.iter().copied());
7084
7085            // Mark the data sections as NOT referencing the current block, and make them reference the next block.
7086            for section_index in &referenced_by_data {
7087                if let Some(list) = reachability_graph.code_references_in_data_section.get_mut(section_index) {
7088                    list.retain(|&target| target != current);
7089                    list.push(next);
7090                    list.sort_unstable();
7091                    list.dedup();
7092                }
7093            }
7094        }
7095
7096        remove_code_if_globally_unreachable(all_blocks, reachability_graph, None, current);
7097    }
7098
7099    for &current in &removed {
7100        assert!(
7101            !reachability_graph.is_code_reachable(current),
7102            "block {current:?} still reachable: {:#?}",
7103            reachability_graph.for_code.get(&current)
7104        );
7105    }
7106
7107    used_blocks.retain(|current| !removed.contains(current));
7108}
7109
7110fn spill_fake_registers(
7111    section_regspill: SectionIndex,
7112    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
7113    reachability_graph: &mut ReachabilityGraph,
7114    imports: &[Import],
7115    used_blocks: &[BlockTarget],
7116    regspill_size: &mut usize,
7117    is_rv64: bool,
7118) {
7119    struct RegAllocBlock<'a> {
7120        instructions: &'a [Vec<regalloc2::Operand>],
7121        num_vregs: usize,
7122    }
7123
7124    impl<'a> regalloc2::Function for RegAllocBlock<'a> {
7125        fn num_insts(&self) -> usize {
7126            self.instructions.len()
7127        }
7128
7129        fn num_blocks(&self) -> usize {
7130            1
7131        }
7132
7133        fn entry_block(&self) -> regalloc2::Block {
7134            regalloc2::Block(0)
7135        }
7136
7137        fn block_insns(&self, _block: regalloc2::Block) -> regalloc2::InstRange {
7138            regalloc2::InstRange::forward(regalloc2::Inst(0), regalloc2::Inst(self.instructions.len() as u32))
7139        }
7140
7141        fn block_succs(&self, _block: regalloc2::Block) -> &[regalloc2::Block] {
7142            &[]
7143        }
7144
7145        fn block_preds(&self, _block: regalloc2::Block) -> &[regalloc2::Block] {
7146            &[]
7147        }
7148
7149        fn block_params(&self, _block: regalloc2::Block) -> &[regalloc2::VReg] {
7150            &[]
7151        }
7152
7153        fn is_ret(&self, insn: regalloc2::Inst) -> bool {
7154            insn.0 as usize + 1 == self.instructions.len()
7155        }
7156
7157        fn is_branch(&self, _insn: regalloc2::Inst) -> bool {
7158            false
7159        }
7160
7161        fn branch_blockparams(&self, _block: regalloc2::Block, _insn: regalloc2::Inst, _succ_idx: usize) -> &[regalloc2::VReg] {
7162            unimplemented!();
7163        }
7164
7165        fn inst_operands(&self, insn: regalloc2::Inst) -> &[regalloc2::Operand] {
7166            &self.instructions[insn.0 as usize]
7167        }
7168
7169        fn inst_clobbers(&self, _insn: regalloc2::Inst) -> regalloc2::PRegSet {
7170            regalloc2::PRegSet::empty()
7171        }
7172
7173        fn num_vregs(&self) -> usize {
7174            self.num_vregs
7175        }
7176
7177        fn spillslot_size(&self, _regclass: regalloc2::RegClass) -> usize {
7178            1
7179        }
7180    }
7181
7182    let fake_mask = RegMask::fake();
7183    for current in used_blocks {
7184        let block = &mut all_blocks[current.index()];
7185        let Some(start_at) = block
7186            .ops
7187            .iter()
7188            .position(|(_, instruction)| !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty())
7189        else {
7190            continue;
7191        };
7192
7193        let end_at = {
7194            let mut end_at = start_at + 1;
7195            for index in start_at..block.ops.len() {
7196                let instruction = &block.ops[index].1;
7197                if !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty() {
7198                    end_at = index + 1;
7199                }
7200            }
7201            end_at
7202        };
7203
7204        // This block uses one or more "fake" registers which are not supported by the VM.
7205        //
7206        // So we have to spill those register into memory and modify the block in such a way
7207        // that it only uses "real" registers natively supported by the VM.
7208        //
7209        // This is not going to be particularily pretty nor very fast at run time, but it is done only as the last restort.
7210
7211        let mut counter = 0;
7212        let mut reg_to_value_index: [usize; Reg::ALL.len()] = Default::default();
7213        let mut instructions = Vec::new();
7214
7215        let mut prologue = Vec::new();
7216        for reg in RegMask::all() {
7217            let value_index = counter;
7218            counter += 1;
7219            reg_to_value_index[reg as usize] = value_index;
7220            prologue.push(regalloc2::Operand::new(
7221                regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
7222                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)),
7223                regalloc2::OperandKind::Def,
7224                regalloc2::OperandPos::Late,
7225            ));
7226        }
7227
7228        instructions.push(prologue);
7229
7230        for nth_instruction in start_at..end_at {
7231            let (_, instruction) = &block.ops[nth_instruction];
7232            let mut operands = Vec::new();
7233
7234            for (reg, kind) in instruction.operands(imports) {
7235                match kind {
7236                    OpKind::Write => {
7237                        let value_index = counter;
7238                        counter += 1;
7239                        reg_to_value_index[reg as usize] = value_index;
7240                        operands.push(regalloc2::Operand::new(
7241                            regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
7242                            if reg.fake_register_index().is_none() {
7243                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
7244                            } else {
7245                                regalloc2::OperandConstraint::Reg
7246                            },
7247                            regalloc2::OperandKind::Def,
7248                            regalloc2::OperandPos::Late,
7249                        ));
7250                    }
7251                    OpKind::Read => {
7252                        let value_index = reg_to_value_index[reg as usize];
7253                        operands.push(regalloc2::Operand::new(
7254                            regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
7255                            if reg.fake_register_index().is_none() {
7256                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
7257                            } else {
7258                                regalloc2::OperandConstraint::Reg
7259                            },
7260                            regalloc2::OperandKind::Use,
7261                            regalloc2::OperandPos::Early,
7262                        ));
7263                    }
7264                    OpKind::ReadWrite => {
7265                        let value_index_read = reg_to_value_index[reg as usize];
7266                        operands.push(regalloc2::Operand::new(
7267                            regalloc2::VReg::new(value_index_read, regalloc2::RegClass::Int),
7268                            if reg.fake_register_index().is_none() {
7269                                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
7270                            } else {
7271                                regalloc2::OperandConstraint::Reg
7272                            },
7273                            regalloc2::OperandKind::Use,
7274                            regalloc2::OperandPos::Early,
7275                        ));
7276
7277                        let value_index_write = counter;
7278                        counter += 1;
7279
7280                        reg_to_value_index[reg as usize] = value_index_write;
7281                        operands.push(regalloc2::Operand::new(
7282                            regalloc2::VReg::new(value_index_write, regalloc2::RegClass::Int),
7283                            regalloc2::OperandConstraint::Reuse(operands.len() - 1),
7284                            regalloc2::OperandKind::Def,
7285                            regalloc2::OperandPos::Late,
7286                        ));
7287                    }
7288                }
7289            }
7290
7291            instructions.push(operands);
7292        }
7293
7294        let mut epilogue = Vec::new();
7295        for reg in RegMask::all() & !RegMask::fake() {
7296            let value_index = reg_to_value_index[reg as usize];
7297            epilogue.push(regalloc2::Operand::new(
7298                regalloc2::VReg::new(value_index, regalloc2::RegClass::Int),
7299                regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)),
7300                regalloc2::OperandKind::Use,
7301                regalloc2::OperandPos::Early,
7302            ));
7303        }
7304
7305        instructions.push(epilogue);
7306
7307        let alloc_block = RegAllocBlock {
7308            instructions: &instructions,
7309            num_vregs: counter,
7310        };
7311
7312        let env = regalloc2::MachineEnv {
7313            preferred_regs_by_class: [
7314                [Reg::T0, Reg::T1, Reg::T2]
7315                    .map(|reg| regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
7316                    .into(),
7317                vec![],
7318                vec![],
7319            ],
7320            non_preferred_regs_by_class: [
7321                [Reg::S0, Reg::S1]
7322                    .map(|reg| regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int))
7323                    .into(),
7324                vec![],
7325                vec![],
7326            ],
7327            scratch_by_class: [None, None, None],
7328            fixed_stack_slots: vec![],
7329        };
7330
7331        let opts = regalloc2::RegallocOptions {
7332            validate_ssa: true,
7333            ..regalloc2::RegallocOptions::default()
7334        };
7335
7336        let output = match regalloc2::run(&alloc_block, &env, &opts) {
7337            Ok(output) => output,
7338            Err(regalloc2::RegAllocError::SSA(vreg, inst)) => {
7339                let nth_instruction: isize = inst.index() as isize - 1 + start_at as isize;
7340                let instruction = block.ops.get(nth_instruction as usize).map(|(_, instruction)| instruction);
7341                panic!("internal error: register allocation failed because of invalid SSA for {vreg} for instruction {instruction:?}");
7342            }
7343            Err(error) => {
7344                panic!("internal error: register allocation failed: {error}")
7345            }
7346        };
7347
7348        let mut buffer = Vec::new();
7349        let mut edits = output.edits.into_iter().peekable();
7350        for nth_instruction in start_at..=end_at {
7351            while let Some((next_edit_at, edit)) = edits.peek() {
7352                let target_nth_instruction: isize = next_edit_at.inst().index() as isize - 1 + start_at as isize;
7353                if target_nth_instruction < 0
7354                    || target_nth_instruction > nth_instruction as isize
7355                    || (target_nth_instruction == nth_instruction as isize && next_edit_at.pos() == regalloc2::InstPosition::After)
7356                {
7357                    break;
7358                }
7359
7360                let target_nth_instruction = target_nth_instruction as usize;
7361                let regalloc2::Edit::Move { from: src, to: dst } = edit.clone();
7362
7363                // Advance the iterator so that we can use `continue` later.
7364                edits.next();
7365
7366                let reg_size = if is_rv64 { 8 } else { 4 };
7367                let src_reg = src.as_reg();
7368                let dst_reg = dst.as_reg();
7369                let new_instruction = match (dst_reg, src_reg) {
7370                    (Some(dst_reg), None) => {
7371                        let dst_reg = Reg::from_usize(dst_reg.hw_enc()).unwrap();
7372                        let src_slot = src.as_stack().unwrap();
7373                        let offset = src_slot.index() * reg_size;
7374                        *regspill_size = core::cmp::max(*regspill_size, offset + reg_size);
7375                        BasicInst::LoadAbsolute {
7376                            kind: if is_rv64 { LoadKind::U64 } else { LoadKind::I32 },
7377                            dst: dst_reg,
7378                            target: SectionTarget {
7379                                section_index: section_regspill,
7380                                offset: cast(offset).to_u64(),
7381                            },
7382                        }
7383                    }
7384                    (None, Some(src_reg)) => {
7385                        let src_reg = Reg::from_usize(src_reg.hw_enc()).unwrap();
7386                        let dst_slot = dst.as_stack().unwrap();
7387                        let offset = dst_slot.index() * reg_size;
7388                        *regspill_size = core::cmp::max(*regspill_size, offset + reg_size);
7389                        BasicInst::StoreAbsolute {
7390                            kind: if is_rv64 { StoreKind::U64 } else { StoreKind::U32 },
7391                            src: src_reg.into(),
7392                            target: SectionTarget {
7393                                section_index: section_regspill,
7394                                offset: cast(offset).to_u64(),
7395                            },
7396                        }
7397                    }
7398                    (Some(dst_reg), Some(src_reg)) => {
7399                        let dst_reg = Reg::from_usize(dst_reg.hw_enc()).unwrap();
7400                        let src_reg = Reg::from_usize(src_reg.hw_enc()).unwrap();
7401                        if src_reg == dst_reg {
7402                            continue;
7403                        }
7404
7405                        BasicInst::MoveReg {
7406                            dst: dst_reg,
7407                            src: src_reg,
7408                        }
7409                    }
7410                    // Won't be emitted according to `regalloc2` docs.
7411                    (None, None) => unreachable!(),
7412                };
7413
7414                log::trace!("Injected:\n     {new_instruction:?}");
7415
7416                let source = block.ops.get(target_nth_instruction).or(block.ops.last()).unwrap().0.clone();
7417                buffer.push((source, new_instruction));
7418            }
7419
7420            if nth_instruction == end_at {
7421                assert!(edits.next().is_none());
7422                break;
7423            }
7424
7425            let (source, instruction) = &block.ops[nth_instruction];
7426            let mut alloc_index = output.inst_alloc_offsets[nth_instruction - start_at + 1];
7427            let new_instruction = instruction
7428                .clone()
7429                .map_register(|reg, _| {
7430                    let alloc = &output.allocs[alloc_index as usize];
7431                    alloc_index += 1;
7432
7433                    assert_eq!(alloc.kind(), regalloc2::AllocationKind::Reg);
7434                    let allocated_reg = Reg::from_usize(alloc.as_reg().unwrap().hw_enc() as usize).unwrap();
7435                    if reg.fake_register_index().is_none() {
7436                        assert_eq!(reg, allocated_reg);
7437                    } else {
7438                        assert_ne!(reg, allocated_reg);
7439                        assert!(allocated_reg.fake_register_index().is_none());
7440                    }
7441
7442                    allocated_reg
7443                })
7444                .unwrap_or(instruction.clone());
7445
7446            if *instruction == new_instruction {
7447                log::trace!("Unmodified:\n     {instruction:?}");
7448            } else {
7449                log::trace!("Replaced:\n     {instruction:?}\n  -> {new_instruction:?}");
7450            }
7451
7452            buffer.push((source.clone(), new_instruction));
7453        }
7454
7455        assert!(edits.next().is_none());
7456
7457        reachability_graph
7458            .for_data
7459            .entry(section_regspill)
7460            .or_default()
7461            .address_taken_in
7462            .insert(*current);
7463
7464        block.ops.splice(start_at..end_at, buffer);
7465    }
7466
7467    for current in used_blocks {
7468        if all_blocks[current.index()]
7469            .ops
7470            .iter()
7471            .any(|(_, instruction)| !((instruction.src_mask(imports) | instruction.dst_mask(imports)) & fake_mask).is_empty())
7472        {
7473            panic!("internal error: not all fake registers were removed")
7474        }
7475    }
7476}
7477
7478#[deny(clippy::as_conversions)]
7479fn replace_immediates_with_registers(
7480    all_blocks: &mut [BasicBlock<AnyTarget, BlockTarget>],
7481    imports: &[Import],
7482    used_blocks: &[BlockTarget],
7483) {
7484    let mut imm_to_reg: HashMap<i64, RegMask> = HashMap::new();
7485    for block_target in used_blocks {
7486        let mut reg_to_imm: [Option<i64>; Reg::ALL.len()] = [None; Reg::ALL.len()];
7487        imm_to_reg.clear();
7488
7489        // If there already exists a register which contains a given immediate value
7490        // then there's no point in duplicating it here again; just use that register.
7491        macro_rules! replace {
7492            ($src:ident) => {
7493                if let RegImm::Imm(imm) = $src {
7494                    let imm = cast(*imm).to_i64_sign_extend();
7495                    if imm != 0 {
7496                        let mask = imm_to_reg.get(&imm).copied().unwrap_or(RegMask::empty());
7497                        if let Some(reg) = mask.into_iter().next() {
7498                            *$src = RegImm::Reg(reg);
7499                        }
7500                    }
7501                }
7502            };
7503        }
7504
7505        for (_, op) in &mut all_blocks[block_target.index()].ops {
7506            match op {
7507                BasicInst::LoadImmediate { dst, imm } => {
7508                    if let Some(old_imm) = reg_to_imm[dst.to_usize()].take() {
7509                        imm_to_reg.get_mut(&old_imm).unwrap().remove(*dst);
7510                    }
7511
7512                    let imm = cast(*imm).to_i64_sign_extend();
7513                    imm_to_reg.entry(imm).or_insert(RegMask::empty()).insert(*dst);
7514                    reg_to_imm[dst.to_usize()] = Some(imm);
7515                    continue;
7516                }
7517                BasicInst::LoadImmediate64 { dst, imm } => {
7518                    if let Some(old_imm) = reg_to_imm[dst.to_usize()].take() {
7519                        imm_to_reg.get_mut(&old_imm).unwrap().remove(*dst);
7520                    }
7521
7522                    imm_to_reg.entry(*imm).or_insert(RegMask::empty()).insert(*dst);
7523                    reg_to_imm[dst.to_usize()] = Some(*imm);
7524                    continue;
7525                }
7526                BasicInst::AnyAny {
7527                    kind,
7528                    ref mut src1,
7529                    ref mut src2,
7530                    ..
7531                } => {
7532                    replace!(src1);
7533                    if !matches!(
7534                        kind,
7535                        AnyAnyKind::ShiftLogicalLeft32
7536                            | AnyAnyKind::ShiftLogicalRight32
7537                            | AnyAnyKind::ShiftArithmeticRight32
7538                            | AnyAnyKind::ShiftLogicalLeft64
7539                            | AnyAnyKind::ShiftLogicalRight64
7540                            | AnyAnyKind::ShiftArithmeticRight64
7541                            | AnyAnyKind::ShiftLogicalLeft32AndSignExtend
7542                            | AnyAnyKind::ShiftLogicalRight32AndSignExtend
7543                            | AnyAnyKind::ShiftArithmeticRight32AndSignExtend
7544                            | AnyAnyKind::RotateRight32
7545                            | AnyAnyKind::RotateRight32AndSignExtend
7546                            | AnyAnyKind::RotateRight64
7547                    ) {
7548                        replace!(src2);
7549                    }
7550                }
7551                BasicInst::StoreAbsolute { src, .. } => {
7552                    replace!(src);
7553                }
7554                BasicInst::StoreIndirect { src, .. } => {
7555                    replace!(src);
7556                }
7557                BasicInst::Cmov { src, .. } => {
7558                    replace!(src);
7559                }
7560                _ => {}
7561            }
7562
7563            for reg in op.dst_mask(imports) {
7564                if let Some(imm) = reg_to_imm[reg.to_usize()].take() {
7565                    imm_to_reg.get_mut(&imm).unwrap().remove(reg);
7566                }
7567            }
7568        }
7569
7570        if let ControlInst::Branch {
7571            ref mut src1,
7572            ref mut src2,
7573            ..
7574        } = all_blocks[block_target.index()].next.instruction
7575        {
7576            replace!(src1);
7577            replace!(src2);
7578        }
7579    }
7580}
7581
7582fn harvest_all_jump_targets(
7583    elf: &Elf,
7584    data_sections_set: &HashSet<SectionIndex>,
7585    code_sections_set: &HashSet<SectionIndex>,
7586    instructions: &[(Source, InstExt<SectionTarget, SectionTarget>)],
7587    relocations: &BTreeMap<SectionTarget, RelocationKind>,
7588    exports: &[Export],
7589) -> Result<HashSet<SectionTarget>, ProgramFromElfError> {
7590    let mut all_jump_targets = HashSet::new();
7591    for (_, instruction) in instructions {
7592        match instruction {
7593            InstExt::Basic(instruction) => {
7594                let (data_target, code_or_data_target) = instruction.target();
7595                if let Some(target) = data_target {
7596                    if !data_sections_set.contains(&target.section_index) {
7597                        return Err(ProgramFromElfError::other(
7598                            "found basic instruction which refers to a non-data section",
7599                        ));
7600                    }
7601                }
7602
7603                if let Some(target) = code_or_data_target {
7604                    if code_sections_set.contains(&target.section_index) {
7605                        if all_jump_targets.insert(target) {
7606                            log::trace!("Adding jump target: {target} (referenced indirectly by code)");
7607                        }
7608                    } else if !data_sections_set.contains(&target.section_index) {
7609                        return Err(ProgramFromElfError::other(
7610                            "found basic instruction which refers to neither a data nor a text section",
7611                        ));
7612                    }
7613                }
7614            }
7615            InstExt::Control(instruction) => {
7616                for target in instruction.targets().into_iter().flatten() {
7617                    if !code_sections_set.contains(&target.section_index) {
7618                        return Err(ProgramFromElfError::other(
7619                            "found control instruction which refers to a non-text section",
7620                        ));
7621                    }
7622
7623                    if all_jump_targets.insert(*target) {
7624                        log::trace!("Adding jump target: {target} (referenced by a control instruction)");
7625                    }
7626                }
7627            }
7628        }
7629    }
7630
7631    for (source_location, relocation) in relocations {
7632        if !data_sections_set.contains(&source_location.section_index) {
7633            continue;
7634        }
7635
7636        for target in relocation.targets().into_iter().flatten() {
7637            #[allow(clippy::collapsible_if)]
7638            if code_sections_set.contains(&target.section_index) {
7639                if all_jump_targets.insert(target) {
7640                    log::trace!(
7641                        "Adding jump target: {target} (referenced by relocation from {source_location} in '{}')",
7642                        elf.section_by_index(source_location.section_index).name()
7643                    );
7644                }
7645            }
7646        }
7647    }
7648
7649    for export in exports {
7650        let target = export.location;
7651        if !code_sections_set.contains(&target.section_index) {
7652            return Err(ProgramFromElfError::other("export points to a non-code section"));
7653        }
7654
7655        if all_jump_targets.insert(target) {
7656            log::trace!("Adding jump target: {target} (referenced by export)");
7657        }
7658    }
7659
7660    Ok(all_jump_targets)
7661}
7662
7663struct VecSet<T> {
7664    vec: VecDeque<T>,
7665    set: HashSet<T>,
7666}
7667
7668impl<T> VecSet<T> {
7669    fn new() -> Self {
7670        Self {
7671            vec: VecDeque::new(),
7672            set: HashSet::new(),
7673        }
7674    }
7675
7676    fn pop_unique(&mut self) -> Option<T> {
7677        self.vec.pop_front()
7678    }
7679
7680    fn pop_non_unique(&mut self) -> Option<T>
7681    where
7682        T: core::hash::Hash + Eq,
7683    {
7684        // Popping from the front instead of the back cuts down on the time
7685        // the optimizer takes for the Westend runtime from ~53s down to ~2.6s
7686        let value = self.vec.pop_front()?;
7687        self.set.remove(&value);
7688        Some(value)
7689    }
7690
7691    fn push(&mut self, value: T)
7692    where
7693        T: core::hash::Hash + Eq + Clone,
7694    {
7695        if self.set.insert(value.clone()) {
7696            self.vec.push_back(value);
7697        }
7698    }
7699
7700    fn is_empty(&self) -> bool {
7701        self.vec.is_empty()
7702    }
7703
7704    fn into_set(self) -> HashSet<T> {
7705        self.set
7706    }
7707}
7708
7709#[derive(PartialEq, Eq, Debug, Default)]
7710struct ReachabilityGraph {
7711    for_code: BTreeMap<BlockTarget, Reachability>,
7712    for_data: BTreeMap<SectionIndex, Reachability>,
7713    code_references_in_data_section: BTreeMap<SectionIndex, Vec<BlockTarget>>,
7714    data_references_in_data_section: BTreeMap<SectionIndex, Vec<SectionIndex>>,
7715}
7716
7717impl ReachabilityGraph {
7718    fn is_code_reachable(&self, block_target: BlockTarget) -> bool {
7719        if let Some(reachability) = self.for_code.get(&block_target) {
7720            assert!(
7721                !reachability.is_unreachable(),
7722                "Block {block_target:?} is unreachable and yet it wasn't removed from the graph!"
7723            );
7724            true
7725        } else {
7726            false
7727        }
7728    }
7729
7730    fn is_data_section_reachable(&self, section_index: SectionIndex) -> bool {
7731        if let Some(reachability) = self.for_data.get(&section_index) {
7732            assert!(!reachability.is_unreachable());
7733            true
7734        } else {
7735            false
7736        }
7737    }
7738
7739    fn mark_data_section_reachable(&mut self, section_index: SectionIndex) {
7740        self.for_data.entry(section_index).or_default().always_reachable = true;
7741    }
7742}
7743
7744#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Default)]
7745struct Reachability {
7746    reachable_from: BTreeSet<BlockTarget>,
7747    address_taken_in: BTreeSet<BlockTarget>,
7748    referenced_by_data: BTreeSet<SectionIndex>,
7749    always_reachable: bool,
7750    always_dynamically_reachable: bool,
7751    exports: Vec<usize>,
7752}
7753
7754impl Reachability {
7755    fn is_only_statically_reachable(&self) -> bool {
7756        !self.always_reachable
7757            && !self.always_dynamically_reachable
7758            && self.referenced_by_data.is_empty()
7759            && self.address_taken_in.is_empty()
7760            && self.exports.is_empty()
7761    }
7762
7763    fn is_only_reachable_from(&self, block_target: BlockTarget) -> bool {
7764        self.is_only_statically_reachable() && self.reachable_from.len() == 1 && self.reachable_from.contains(&block_target)
7765    }
7766
7767    fn is_unreachable(&self) -> bool {
7768        self.reachable_from.is_empty()
7769            && self.address_taken_in.is_empty()
7770            && self.referenced_by_data.is_empty()
7771            && !self.always_reachable
7772            && !self.always_dynamically_reachable
7773            && self.exports.is_empty()
7774    }
7775
7776    fn is_dynamically_reachable(&self) -> bool {
7777        !self.address_taken_in.is_empty() || !self.referenced_by_data.is_empty() || self.always_dynamically_reachable
7778    }
7779
7780    fn always_reachable_or_exported(&self) -> bool {
7781        self.always_reachable || !self.exports.is_empty()
7782    }
7783}
7784
7785#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
7786enum ExtRef {
7787    Address(BlockTarget),
7788    Jump(BlockTarget),
7789    DataAddress(SectionIndex),
7790}
7791
7792fn each_reference_for_basic_instruction(instruction: &BasicInst<AnyTarget>, mut cb: impl FnMut(ExtRef)) {
7793    let (data_target, code_or_data_target) = instruction.target();
7794    if let Some(target) = data_target {
7795        cb(ExtRef::DataAddress(target.section_index));
7796    }
7797
7798    if let Some(target) = code_or_data_target {
7799        match target {
7800            AnyTarget::Code(target) => {
7801                cb(ExtRef::Address(target));
7802            }
7803            AnyTarget::Data(target) => {
7804                cb(ExtRef::DataAddress(target.section_index));
7805            }
7806        }
7807    }
7808}
7809
7810fn each_reference_for_control_instruction(instruction: &ControlInst<BlockTarget>, mut cb: impl FnMut(ExtRef)) {
7811    match *instruction {
7812        ControlInst::Jump { target } => {
7813            cb(ExtRef::Jump(target));
7814        }
7815        ControlInst::Call { target, target_return, .. } => {
7816            cb(ExtRef::Jump(target));
7817            cb(ExtRef::Address(target_return));
7818        }
7819        ControlInst::CallIndirect { target_return, .. } => {
7820            cb(ExtRef::Address(target_return));
7821        }
7822        ControlInst::Branch {
7823            target_true, target_false, ..
7824        } => {
7825            cb(ExtRef::Jump(target_true));
7826            cb(ExtRef::Jump(target_false));
7827        }
7828        ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => {}
7829    }
7830}
7831
7832fn each_reference(block: &BasicBlock<AnyTarget, BlockTarget>, mut cb: impl FnMut(ExtRef)) {
7833    for (_, instruction) in &block.ops {
7834        each_reference_for_basic_instruction(instruction, &mut cb);
7835    }
7836
7837    each_reference_for_control_instruction(&block.next.instruction, cb);
7838}
7839
7840fn calculate_reachability(
7841    section_to_block: &HashMap<SectionTarget, BlockTarget>,
7842    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
7843    data_sections_set: &HashSet<SectionIndex>,
7844    exports: &[Export],
7845    relocations: &BTreeMap<SectionTarget, RelocationKind>,
7846) -> Result<ReachabilityGraph, ProgramFromElfError> {
7847    let mut graph = ReachabilityGraph::default();
7848    let mut data_queue: VecSet<SectionTarget> = VecSet::new();
7849    let mut block_queue: VecSet<BlockTarget> = VecSet::new();
7850    let mut section_queue: VecSet<SectionIndex> = VecSet::new();
7851    let mut relocations_per_section: HashMap<SectionIndex, Vec<&RelocationKind>> = HashMap::new();
7852    for (relocation_location, relocation) in relocations.iter() {
7853        relocations_per_section
7854            .entry(relocation_location.section_index)
7855            .or_insert_with(Vec::new)
7856            .push(relocation);
7857    }
7858
7859    for (export_index, export) in exports.iter().enumerate() {
7860        let Some(&block_target) = section_to_block.get(&export.location) else {
7861            return Err(ProgramFromElfError::other("export points to a non-block"));
7862        };
7863
7864        graph.for_code.entry(block_target).or_default().exports.push(export_index);
7865        block_queue.push(block_target);
7866    }
7867
7868    while !block_queue.is_empty() || !data_queue.is_empty() {
7869        while let Some(current_block) = block_queue.pop_unique() {
7870            each_reference(&all_blocks[current_block.index()], |ext| match ext {
7871                ExtRef::Jump(target) => {
7872                    graph.for_code.entry(target).or_default().reachable_from.insert(current_block);
7873                    block_queue.push(target);
7874                }
7875                ExtRef::Address(target) => {
7876                    graph.for_code.entry(target).or_default().address_taken_in.insert(current_block);
7877                    block_queue.push(target)
7878                }
7879                ExtRef::DataAddress(target) => {
7880                    graph.for_data.entry(target).or_default().address_taken_in.insert(current_block);
7881                    section_queue.push(target)
7882                }
7883            });
7884        }
7885
7886        while let Some(target) = data_queue.pop_unique() {
7887            assert!(!section_to_block.contains_key(&target));
7888            assert!(data_sections_set.contains(&target.section_index));
7889            section_queue.push(target.section_index);
7890        }
7891
7892        while let Some(section_index) = section_queue.pop_unique() {
7893            let Some(local_relocations) = relocations_per_section.get(&section_index) else {
7894                continue;
7895            };
7896            for relocation in local_relocations {
7897                for relocation_target in relocation.targets().into_iter().flatten() {
7898                    if let Some(&block_target) = section_to_block.get(&relocation_target) {
7899                        graph
7900                            .code_references_in_data_section
7901                            .entry(section_index)
7902                            .or_default()
7903                            .push(block_target);
7904
7905                        graph
7906                            .for_code
7907                            .entry(block_target)
7908                            .or_default()
7909                            .referenced_by_data
7910                            .insert(section_index);
7911
7912                        block_queue.push(block_target);
7913                    } else {
7914                        graph
7915                            .data_references_in_data_section
7916                            .entry(section_index)
7917                            .or_default()
7918                            .push(relocation_target.section_index);
7919
7920                        graph
7921                            .for_data
7922                            .entry(relocation_target.section_index)
7923                            .or_default()
7924                            .referenced_by_data
7925                            .insert(section_index);
7926
7927                        data_queue.push(relocation_target);
7928                    }
7929                }
7930            }
7931        }
7932    }
7933
7934    for list in graph.code_references_in_data_section.values_mut() {
7935        list.sort_unstable();
7936        list.dedup();
7937    }
7938
7939    for list in graph.data_references_in_data_section.values_mut() {
7940        list.sort_unstable();
7941        list.dedup();
7942    }
7943
7944    for reachability in graph.for_code.values() {
7945        assert!(!reachability.is_unreachable());
7946    }
7947
7948    for reachability in graph.for_data.values() {
7949        assert!(!reachability.is_unreachable());
7950    }
7951
7952    assert_eq!(block_queue.set.len(), graph.for_code.len());
7953    Ok(graph)
7954}
7955
7956#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Default)]
7957struct RegMask(u32);
7958
7959impl core::fmt::Debug for RegMask {
7960    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
7961        fmt.write_str("(")?;
7962        let mut is_first = true;
7963        for (nth, reg) in Reg::ALL.iter().enumerate() {
7964            if self.0 & (1 << nth) != 0 {
7965                if is_first {
7966                    is_first = false;
7967                } else {
7968                    fmt.write_str("|")?;
7969                }
7970                fmt.write_str(reg.name())?;
7971            }
7972        }
7973        fmt.write_str(")")?;
7974        Ok(())
7975    }
7976}
7977
7978struct RegMaskIter {
7979    mask: u32,
7980    remaining: &'static [Reg],
7981}
7982
7983impl Iterator for RegMaskIter {
7984    type Item = Reg;
7985    fn next(&mut self) -> Option<Self::Item> {
7986        loop {
7987            let reg = *self.remaining.get(0)?;
7988            let is_set = (self.mask & 1) != 0;
7989            self.remaining = &self.remaining[1..];
7990            self.mask >>= 1;
7991
7992            if is_set {
7993                return Some(reg);
7994            }
7995        }
7996    }
7997}
7998
7999impl IntoIterator for RegMask {
8000    type Item = Reg;
8001    type IntoIter = RegMaskIter;
8002
8003    fn into_iter(self) -> Self::IntoIter {
8004        RegMaskIter {
8005            mask: self.0,
8006            remaining: &Reg::ALL,
8007        }
8008    }
8009}
8010
8011impl RegMask {
8012    fn all() -> Self {
8013        RegMask((1 << Reg::ALL.len()) - 1)
8014    }
8015
8016    fn fake() -> Self {
8017        let mut mask = RegMask(0);
8018        for reg in Reg::FAKE {
8019            mask.insert(reg);
8020        }
8021        mask
8022    }
8023
8024    fn empty() -> Self {
8025        RegMask(0)
8026    }
8027
8028    fn is_empty(self) -> bool {
8029        self == Self::empty()
8030    }
8031
8032    fn remove(&mut self, mask: impl Into<RegMask>) {
8033        *self &= !mask.into();
8034    }
8035
8036    fn insert(&mut self, mask: impl Into<RegMask>) {
8037        *self |= mask.into();
8038    }
8039
8040    fn from_regs(regs: impl IntoIterator<Item = Reg>) -> Self {
8041        let mut mask = Self::empty();
8042        for reg in regs {
8043            mask.insert(reg);
8044        }
8045
8046        mask
8047    }
8048}
8049
8050impl From<Reg> for RegMask {
8051    fn from(reg: Reg) -> Self {
8052        RegMask(1 << (reg as usize))
8053    }
8054}
8055
8056impl From<RegImm> for RegMask {
8057    fn from(rm: RegImm) -> Self {
8058        match rm {
8059            RegImm::Reg(reg) => reg.into(),
8060            RegImm::Imm(_) => Self::empty(),
8061        }
8062    }
8063}
8064
8065impl<'a> From<&'a [Reg]> for RegMask {
8066    fn from(slice: &'a [Reg]) -> RegMask {
8067        let mut mask = RegMask::empty();
8068        for &reg in slice {
8069            mask.insert(reg);
8070        }
8071        mask
8072    }
8073}
8074
8075impl<'a> From<&'a Vec<Reg>> for RegMask {
8076    fn from(slice: &'a Vec<Reg>) -> RegMask {
8077        RegMask::from(slice.as_slice())
8078    }
8079}
8080
8081impl core::ops::Not for RegMask {
8082    type Output = Self;
8083    fn not(self) -> Self {
8084        RegMask(!self.0)
8085    }
8086}
8087
8088impl core::ops::BitAnd for RegMask {
8089    type Output = Self;
8090    fn bitand(self, rhs: RegMask) -> Self {
8091        RegMask(self.0 & rhs.0)
8092    }
8093}
8094
8095impl core::ops::BitAnd<Reg> for RegMask {
8096    type Output = Self;
8097    fn bitand(self, rhs: Reg) -> Self {
8098        self & RegMask::from(rhs)
8099    }
8100}
8101
8102impl core::ops::BitAndAssign for RegMask {
8103    fn bitand_assign(&mut self, rhs: RegMask) {
8104        self.0 &= rhs.0;
8105    }
8106}
8107
8108impl core::ops::BitAndAssign<Reg> for RegMask {
8109    fn bitand_assign(&mut self, rhs: Reg) {
8110        self.bitand_assign(RegMask::from(rhs));
8111    }
8112}
8113
8114impl core::ops::BitOr for RegMask {
8115    type Output = Self;
8116    fn bitor(self, rhs: RegMask) -> Self {
8117        RegMask(self.0 | rhs.0)
8118    }
8119}
8120
8121impl core::ops::BitOr<Reg> for RegMask {
8122    type Output = Self;
8123    fn bitor(self, rhs: Reg) -> Self {
8124        self | RegMask::from(rhs)
8125    }
8126}
8127
8128impl core::ops::BitOrAssign for RegMask {
8129    fn bitor_assign(&mut self, rhs: RegMask) {
8130        self.0 |= rhs.0;
8131    }
8132}
8133
8134impl core::ops::BitOrAssign<Reg> for RegMask {
8135    fn bitor_assign(&mut self, rhs: Reg) {
8136        self.bitor_assign(RegMask::from(rhs));
8137    }
8138}
8139
8140#[test]
8141fn test_all_regs_indexes() {
8142    for (index, reg) in Reg::ALL.iter().enumerate() {
8143        assert_eq!(index, *reg as usize);
8144    }
8145}
8146
8147#[derive(Copy, Clone)]
8148struct JumpTarget {
8149    static_target: u32,
8150    dynamic_target: Option<u32>,
8151}
8152
8153fn build_jump_table(
8154    total_block_count: usize,
8155    used_blocks: &[BlockTarget],
8156    reachability_graph: &ReachabilityGraph,
8157) -> (Vec<u32>, Vec<Option<JumpTarget>>) {
8158    let mut jump_target_for_block: Vec<Option<JumpTarget>> = Vec::new();
8159    jump_target_for_block.resize(total_block_count, None);
8160
8161    let mut jump_table = Vec::new();
8162    for (static_target, current) in used_blocks.iter().enumerate() {
8163        let reachability = reachability_graph.for_code.get(current).unwrap();
8164        assert!(!reachability.is_unreachable());
8165
8166        let dynamic_target = if reachability.is_dynamically_reachable() {
8167            let dynamic_target: u32 = (jump_table.len() + 1).try_into().expect("jump table index overflow");
8168            jump_table.push(static_target.try_into().expect("jump table index overflow"));
8169            Some(dynamic_target)
8170        } else {
8171            None
8172        };
8173
8174        jump_target_for_block[current.index()] = Some(JumpTarget {
8175            static_target: static_target.try_into().expect("jump table index overflow"),
8176            dynamic_target,
8177        });
8178    }
8179
8180    (jump_table, jump_target_for_block)
8181}
8182
8183fn calculate_whether_can_fallthrough(
8184    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
8185    used_blocks: &[BlockTarget],
8186) -> HashSet<BlockTarget> {
8187    let mut can_fallthrough_to_next_block: HashSet<BlockTarget> = HashSet::new();
8188    for window in used_blocks.windows(2) {
8189        match all_blocks[window[0].index()].next.instruction {
8190            ControlInst::Jump { target }
8191            | ControlInst::Branch { target_false: target, .. }
8192            | ControlInst::Call { target_return: target, .. }
8193            | ControlInst::CallIndirect { target_return: target, .. } => {
8194                if target == window[1] {
8195                    can_fallthrough_to_next_block.insert(window[0]);
8196                }
8197            }
8198
8199            ControlInst::JumpIndirect { .. } | ControlInst::Unimplemented => {}
8200        }
8201    }
8202
8203    can_fallthrough_to_next_block
8204}
8205
8206#[allow(clippy::too_many_arguments)]
8207fn emit_code(
8208    section_to_function_name: &BTreeMap<SectionTarget, String>,
8209    imports: &[Import],
8210    base_address_for_section: &HashMap<SectionIndex, u64>,
8211    section_got: SectionIndex,
8212    target_to_got_offset: &HashMap<AnyTarget, u64>,
8213    all_blocks: &[BasicBlock<AnyTarget, BlockTarget>],
8214    used_blocks: &[BlockTarget],
8215    used_imports: &HashSet<usize>,
8216    jump_target_for_block: &[Option<JumpTarget>],
8217    is_optimized: bool,
8218    is_rv64: bool,
8219    heap_base: u32,
8220) -> Result<(Vec<(SourceStack, Instruction)>, Vec<usize>), ProgramFromElfError> {
8221    use polkavm_common::program::Reg as PReg;
8222    fn conv_reg(reg: Reg) -> polkavm_common::program::RawReg {
8223        match reg {
8224            Reg::RA => PReg::RA,
8225            Reg::SP => PReg::SP,
8226            Reg::T0 => PReg::T0,
8227            Reg::T1 => PReg::T1,
8228            Reg::T2 => PReg::T2,
8229            Reg::S0 => PReg::S0,
8230            Reg::S1 => PReg::S1,
8231            Reg::A0 => PReg::A0,
8232            Reg::A1 => PReg::A1,
8233            Reg::A2 => PReg::A2,
8234            Reg::A3 => PReg::A3,
8235            Reg::A4 => PReg::A4,
8236            Reg::A5 => PReg::A5,
8237            Reg::E0 | Reg::E1 | Reg::E2 | Reg::E3 => {
8238                unreachable!("internal error: temporary register was not spilled into memory");
8239            }
8240        }
8241        .into()
8242    }
8243
8244    let can_fallthrough_to_next_block = calculate_whether_can_fallthrough(all_blocks, used_blocks);
8245    let get_data_address = |source: &SourceStack, target: SectionTarget| -> Result<u32, ProgramFromElfError> {
8246        if let Some(&base_address) = base_address_for_section.get(&target.section_index) {
8247            let Some(address) = base_address.checked_add(target.offset) else {
8248                return Err(ProgramFromElfError::other(format!(
8249                    "address overflow when relocating instruction in {}",
8250                    source.display(section_to_function_name)
8251                )));
8252            };
8253
8254            let Ok(address) = address.try_into() else {
8255                return Err(ProgramFromElfError::other("address overflow when casting"));
8256            };
8257
8258            Ok(address)
8259        } else {
8260            Err(ProgramFromElfError::other("internal error: section with no base address"))
8261        }
8262    };
8263
8264    let get_jump_target = |target: BlockTarget| -> Result<JumpTarget, ProgramFromElfError> {
8265        let Some(jump_target) = jump_target_for_block[target.index()] else {
8266            return Err(ProgramFromElfError::other("out of range jump target"));
8267        };
8268
8269        Ok(jump_target)
8270    };
8271
8272    let mut basic_block_delimited = true;
8273    let mut code: Vec<(SourceStack, Instruction)> = Vec::new();
8274    let mut offsets = Vec::new();
8275    for block_target in used_blocks {
8276        let block = &all_blocks[block_target.index()];
8277
8278        if !basic_block_delimited {
8279            basic_block_delimited = true;
8280            code.push((
8281                Source {
8282                    section_index: block.source.section_index,
8283                    offset_range: (block.source.offset_range.start..block.source.offset_range.start + 4).into(),
8284                }
8285                .into(),
8286                Instruction::fallthrough,
8287            ));
8288        }
8289
8290        offsets.push(code.len());
8291
8292        macro_rules! codegen {
8293            (
8294                args = $args:tt,
8295                kind = $kind:expr,
8296
8297                {
8298                    $($p:pat => $inst:ident,)+
8299                }
8300            ) => {
8301                match $kind {
8302                    $(
8303                        $p => Instruction::$inst $args
8304                    ),+
8305                }
8306            }
8307        }
8308
8309        if block.is_unlikely && !(block.ops.is_empty() && matches!(block.next.instruction, ControlInst::Unimplemented)) {
8310            code.push((
8311                Source {
8312                    section_index: block.source.section_index,
8313                    offset_range: (block.source.offset_range.start..block.source.offset_range.start + 4).into(),
8314                }
8315                .into(),
8316                Instruction::unlikely,
8317            ));
8318        }
8319
8320        for (source, op) in &block.ops {
8321            let op = match *op {
8322                BasicInst::LoadImmediate { dst, imm } => Instruction::load_imm(conv_reg(dst), cast(imm).to_unsigned()),
8323                BasicInst::LoadImmediate64 { dst, imm } => {
8324                    if !is_rv64 {
8325                        unreachable!("internal error: load_imm64 found when processing 32-bit binary")
8326                    } else {
8327                        Instruction::load_imm64(conv_reg(dst), cast(imm).to_unsigned())
8328                    }
8329                }
8330                BasicInst::LoadHeapBase { dst } => Instruction::load_imm(conv_reg(dst), heap_base),
8331                BasicInst::LoadAbsolute { kind, dst, target } => {
8332                    codegen! {
8333                        args = (conv_reg(dst), get_data_address(source, target)?),
8334                        kind = kind,
8335                        {
8336                            LoadKind::I8 => load_i8,
8337                            LoadKind::I16 => load_i16,
8338                            LoadKind::I32 => load_i32,
8339                            LoadKind::U8 => load_u8,
8340                            LoadKind::U16 => load_u16,
8341                            LoadKind::U32 => load_u32,
8342                            LoadKind::U64 => load_u64,
8343                        }
8344                    }
8345                }
8346                BasicInst::StoreAbsolute { kind, src, target } => {
8347                    let target = get_data_address(source, target)?;
8348                    match src {
8349                        RegImm::Reg(src) => {
8350                            codegen! {
8351                                args = (conv_reg(src), target),
8352                                kind = kind,
8353                                {
8354                                    StoreKind::U64 => store_u64,
8355                                    StoreKind::U32 => store_u32,
8356                                    StoreKind::U16 => store_u16,
8357                                    StoreKind::U8 => store_u8,
8358                                }
8359                            }
8360                        }
8361                        RegImm::Imm(value) => {
8362                            codegen! {
8363                                args = (target, cast(value).to_unsigned()),
8364                                kind = kind,
8365                                {
8366                                    StoreKind::U64 => store_imm_u64,
8367                                    StoreKind::U32 => store_imm_u32,
8368                                    StoreKind::U16 => store_imm_u16,
8369                                    StoreKind::U8 => store_imm_u8,
8370                                }
8371                            }
8372                        }
8373                    }
8374                }
8375                BasicInst::LoadIndirect { kind, dst, base, offset } => {
8376                    codegen! {
8377                        args = (conv_reg(dst), conv_reg(base), cast(offset).to_unsigned()),
8378                        kind = kind,
8379                        {
8380                            LoadKind::I8 => load_indirect_i8,
8381                            LoadKind::I16 => load_indirect_i16,
8382                            LoadKind::I32 => load_indirect_i32,
8383                            LoadKind::U8 => load_indirect_u8,
8384                            LoadKind::U16 => load_indirect_u16,
8385                            LoadKind::U32 => load_indirect_u32,
8386                            LoadKind::U64 => load_indirect_u64,
8387                        }
8388                    }
8389                }
8390                BasicInst::StoreIndirect { kind, src, base, offset } => match src {
8391                    RegImm::Reg(src) => {
8392                        codegen! {
8393                            args = (conv_reg(src), conv_reg(base), cast(offset).to_unsigned()),
8394                            kind = kind,
8395                            {
8396                                StoreKind::U64 => store_indirect_u64,
8397                                StoreKind::U32 => store_indirect_u32,
8398                                StoreKind::U16 => store_indirect_u16,
8399                                StoreKind::U8 => store_indirect_u8,
8400                            }
8401                        }
8402                    }
8403                    RegImm::Imm(value) => {
8404                        codegen! {
8405                            args = (conv_reg(base), cast(offset).to_unsigned(), cast(value).to_unsigned()),
8406                            kind = kind,
8407                            {
8408                                StoreKind::U64 => store_imm_indirect_u64,
8409                                StoreKind::U32 => store_imm_indirect_u32,
8410                                StoreKind::U16 => store_imm_indirect_u16,
8411                                StoreKind::U8 => store_imm_indirect_u8,
8412                            }
8413                        }
8414                    }
8415                },
8416                BasicInst::LoadAddress { dst, target } => {
8417                    let value = match target {
8418                        AnyTarget::Code(target) => {
8419                            let value = get_jump_target(target)?.dynamic_target.expect("missing jump target for address");
8420                            let Some(value) = value.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
8421                                return Err(ProgramFromElfError::other("overflow when emitting an address load"));
8422                            };
8423                            value
8424                        }
8425                        AnyTarget::Data(target) => get_data_address(source, target)?,
8426                    };
8427
8428                    Instruction::load_imm(conv_reg(dst), value)
8429                }
8430                BasicInst::LoadAddressIndirect { dst, target } => {
8431                    let Some(&offset) = target_to_got_offset.get(&target) else {
8432                        return Err(ProgramFromElfError::other(
8433                            "indirect address load without a corresponding GOT entry",
8434                        ));
8435                    };
8436
8437                    let target = SectionTarget {
8438                        section_index: section_got,
8439                        offset,
8440                    };
8441
8442                    let value = get_data_address(source, target)?;
8443                    if is_rv64 {
8444                        Instruction::load_u64(conv_reg(dst), value)
8445                    } else {
8446                        Instruction::load_i32(conv_reg(dst), value)
8447                    }
8448                }
8449                BasicInst::Reg { kind, dst, src } => {
8450                    codegen! {
8451                        args = (conv_reg(dst), conv_reg(src)),
8452                        kind = kind,
8453                        {
8454                            RegKind::CountLeadingZeroBits32 => count_leading_zero_bits_32,
8455                            RegKind::CountLeadingZeroBits64 => count_leading_zero_bits_64,
8456                            RegKind::CountSetBits32 => count_set_bits_32,
8457                            RegKind::CountSetBits64 => count_set_bits_64,
8458                            RegKind::CountTrailingZeroBits32 => count_trailing_zero_bits_32,
8459                            RegKind::CountTrailingZeroBits64 => count_trailing_zero_bits_64,
8460                            RegKind::ReverseByte => reverse_byte,
8461                            RegKind::SignExtend8 => sign_extend_8,
8462                            RegKind::SignExtend16 => sign_extend_16,
8463                            RegKind::ZeroExtend16 => zero_extend_16,
8464                        }
8465                    }
8466                }
8467                BasicInst::RegReg { kind, dst, src1, src2 } => {
8468                    use RegRegKind as K;
8469                    codegen! {
8470                        args = (conv_reg(dst), conv_reg(src1), conv_reg(src2)),
8471                        kind = kind,
8472                        {
8473                            K::MulUpperSignedSigned32 => mul_upper_signed_signed,
8474                            K::MulUpperSignedSigned64 => mul_upper_signed_signed,
8475                            K::MulUpperUnsignedUnsigned32 => mul_upper_unsigned_unsigned,
8476                            K::MulUpperUnsignedUnsigned64 => mul_upper_unsigned_unsigned,
8477                            K::MulUpperSignedUnsigned32 => mul_upper_signed_unsigned,
8478                            K::MulUpperSignedUnsigned64 => mul_upper_signed_unsigned,
8479                            K::Div32 => div_signed_32,
8480                            K::Div32AndSignExtend => div_signed_32,
8481                            K::Div64 => div_signed_64,
8482                            K::DivUnsigned32 => div_unsigned_32,
8483                            K::DivUnsigned32AndSignExtend => div_unsigned_32,
8484                            K::DivUnsigned64 => div_unsigned_64,
8485                            K::Rem32 => rem_signed_32,
8486                            K::Rem32AndSignExtend => rem_signed_32,
8487                            K::Rem64 => rem_signed_64,
8488                            K::RemUnsigned32 => rem_unsigned_32,
8489                            K::RemUnsigned32AndSignExtend => rem_unsigned_32,
8490                            K::RemUnsigned64 => rem_unsigned_64,
8491                            K::AndInverted => and_inverted,
8492                            K::OrInverted => or_inverted,
8493                            K::Xnor => xnor,
8494                            K::Maximum => maximum,
8495                            K::MaximumUnsigned => maximum_unsigned,
8496                            K::Minimum => minimum,
8497                            K::MinimumUnsigned => minimum_unsigned,
8498                            K::RotateLeft32 => rotate_left_32,
8499                            K::RotateLeft32AndSignExtend => rotate_left_32,
8500                            K::RotateLeft64 => rotate_left_64,
8501                        }
8502                    }
8503                }
8504                BasicInst::MoveReg { dst, src } => Instruction::move_reg(conv_reg(dst), conv_reg(src)),
8505                BasicInst::AnyAny { kind, dst, src1, src2 } => {
8506                    use AnyAnyKind as K;
8507                    use Instruction as I;
8508                    let dst = conv_reg(dst);
8509                    match (src1, src2) {
8510                        (RegImm::Reg(src1), RegImm::Reg(src2)) => {
8511                            codegen! {
8512                                args = (dst, conv_reg(src1), conv_reg(src2)),
8513                                kind = kind,
8514                                {
8515                                    K::Add32 => add_32,
8516                                    K::Add32AndSignExtend => add_32,
8517                                    K::Add64 => add_64,
8518                                    K::Sub32 => sub_32,
8519                                    K::Sub32AndSignExtend => sub_32,
8520                                    K::Sub64 => sub_64,
8521                                    K::ShiftLogicalLeft32 => shift_logical_left_32,
8522                                    K::ShiftLogicalLeft32AndSignExtend => shift_logical_left_32,
8523                                    K::ShiftLogicalLeft64 => shift_logical_left_64,
8524                                    K::SetLessThanSigned32 => set_less_than_signed,
8525                                    K::SetLessThanSigned64 => set_less_than_signed,
8526                                    K::SetLessThanUnsigned32 => set_less_than_unsigned,
8527                                    K::SetLessThanUnsigned64 => set_less_than_unsigned,
8528                                    K::Xor32 => xor,
8529                                    K::Xor64 => xor,
8530                                    K::ShiftLogicalRight32 => shift_logical_right_32,
8531                                    K::ShiftLogicalRight32AndSignExtend => shift_logical_right_32,
8532                                    K::ShiftLogicalRight64 => shift_logical_right_64,
8533                                    K::ShiftArithmeticRight32 => shift_arithmetic_right_32,
8534                                    K::ShiftArithmeticRight32AndSignExtend => shift_arithmetic_right_32,
8535                                    K::ShiftArithmeticRight64 => shift_arithmetic_right_64,
8536                                    K::Or32 => or,
8537                                    K::Or64 => or,
8538                                    K::And32 => and,
8539                                    K::And64 => and,
8540                                    K::Mul32 => mul_32,
8541                                    K::Mul32AndSignExtend => mul_32,
8542                                    K::Mul64 => mul_64,
8543                                    K::RotateRight32 => rotate_right_32,
8544                                    K::RotateRight32AndSignExtend => rotate_right_32,
8545                                    K::RotateRight64 => rotate_right_64,
8546                                }
8547                            }
8548                        }
8549                        (RegImm::Reg(src1), RegImm::Imm(src2)) => {
8550                            let src1 = conv_reg(src1);
8551                            let src2 = cast(src2).to_unsigned();
8552                            match kind {
8553                                K::Add32 => I::add_imm_32(dst, src1, src2),
8554                                K::Add32AndSignExtend => I::add_imm_32(dst, src1, src2),
8555                                K::Add64 => I::add_imm_64(dst, src1, src2),
8556                                K::Sub32 => I::add_imm_32(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
8557                                K::Sub32AndSignExtend => I::add_imm_32(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
8558                                K::Sub64 => I::add_imm_64(dst, src1, cast(-cast(src2).to_signed()).to_unsigned()),
8559                                K::ShiftLogicalLeft32 => I::shift_logical_left_imm_32(dst, src1, src2),
8560                                K::ShiftLogicalLeft32AndSignExtend => I::shift_logical_left_imm_32(dst, src1, src2),
8561                                K::ShiftLogicalLeft64 => I::shift_logical_left_imm_64(dst, src1, src2),
8562                                K::SetLessThanSigned32 => I::set_less_than_signed_imm(dst, src1, src2),
8563                                K::SetLessThanSigned64 => I::set_less_than_signed_imm(dst, src1, src2),
8564                                K::SetLessThanUnsigned32 => I::set_less_than_unsigned_imm(dst, src1, src2),
8565                                K::SetLessThanUnsigned64 => I::set_less_than_unsigned_imm(dst, src1, src2),
8566                                K::Xor32 | K::Xor64 => I::xor_imm(dst, src1, src2),
8567                                K::ShiftLogicalRight32 => I::shift_logical_right_imm_32(dst, src1, src2),
8568                                K::ShiftLogicalRight32AndSignExtend => I::shift_logical_right_imm_32(dst, src1, src2),
8569                                K::ShiftLogicalRight64 => I::shift_logical_right_imm_64(dst, src1, src2),
8570                                K::ShiftArithmeticRight32 => I::shift_arithmetic_right_imm_32(dst, src1, src2),
8571                                K::ShiftArithmeticRight32AndSignExtend => I::shift_arithmetic_right_imm_32(dst, src1, src2),
8572                                K::ShiftArithmeticRight64 => I::shift_arithmetic_right_imm_64(dst, src1, src2),
8573                                K::Or32 | K::Or64 => I::or_imm(dst, src1, src2),
8574                                K::And32 | K::And64 => I::and_imm(dst, src1, src2),
8575                                K::Mul32 => I::mul_imm_32(dst, src1, src2),
8576                                K::Mul32AndSignExtend => I::mul_imm_32(dst, src1, src2),
8577                                K::Mul64 => I::mul_imm_64(dst, src1, src2),
8578                                K::RotateRight32 => I::rotate_right_imm_32(dst, src1, src2),
8579                                K::RotateRight32AndSignExtend => I::rotate_right_imm_32(dst, src1, src2),
8580                                K::RotateRight64 => I::rotate_right_imm_64(dst, src1, src2),
8581                            }
8582                        }
8583                        (RegImm::Imm(src1), RegImm::Reg(src2)) => {
8584                            let src1 = cast(src1).to_unsigned();
8585                            let src2 = conv_reg(src2);
8586                            match kind {
8587                                K::Add32 => I::add_imm_32(dst, src2, src1),
8588                                K::Add32AndSignExtend => I::add_imm_32(dst, src2, src1),
8589                                K::Add64 => I::add_imm_64(dst, src2, src1),
8590                                K::Xor32 | K::Xor64 => I::xor_imm(dst, src2, src1),
8591                                K::Or32 | K::Or64 => I::or_imm(dst, src2, src1),
8592                                K::And32 | K::And64 => I::and_imm(dst, src2, src1),
8593                                K::Mul32 => I::mul_imm_32(dst, src2, src1),
8594                                K::Mul32AndSignExtend => I::mul_imm_32(dst, src2, src1),
8595                                K::Mul64 => I::mul_imm_64(dst, src2, src1),
8596
8597                                K::Sub32 => I::negate_and_add_imm_32(dst, src2, src1),
8598                                K::Sub32AndSignExtend => I::negate_and_add_imm_32(dst, src2, src1),
8599                                K::Sub64 => I::negate_and_add_imm_64(dst, src2, src1),
8600                                K::ShiftLogicalLeft32 => I::shift_logical_left_imm_alt_32(dst, src2, src1),
8601                                K::ShiftLogicalLeft32AndSignExtend => I::shift_logical_left_imm_alt_32(dst, src2, src1),
8602                                K::ShiftLogicalLeft64 => I::shift_logical_left_imm_alt_64(dst, src2, src1),
8603                                K::SetLessThanSigned32 => I::set_greater_than_signed_imm(dst, src2, src1),
8604                                K::SetLessThanSigned64 => I::set_greater_than_signed_imm(dst, src2, src1),
8605                                K::SetLessThanUnsigned32 => I::set_greater_than_unsigned_imm(dst, src2, src1),
8606                                K::SetLessThanUnsigned64 => I::set_greater_than_unsigned_imm(dst, src2, src1),
8607                                K::ShiftLogicalRight32 => I::shift_logical_right_imm_alt_32(dst, src2, src1),
8608                                K::ShiftLogicalRight32AndSignExtend => I::shift_logical_right_imm_alt_32(dst, src2, src1),
8609                                K::ShiftLogicalRight64 => I::shift_logical_right_imm_alt_64(dst, src2, src1),
8610                                K::ShiftArithmeticRight32 => I::shift_arithmetic_right_imm_alt_32(dst, src2, src1),
8611                                K::ShiftArithmeticRight32AndSignExtend => I::shift_arithmetic_right_imm_alt_32(dst, src2, src1),
8612                                K::ShiftArithmeticRight64 => I::shift_arithmetic_right_imm_alt_64(dst, src2, src1),
8613
8614                                K::RotateRight32 => I::rotate_right_imm_alt_32(dst, src2, src1),
8615                                K::RotateRight32AndSignExtend => I::rotate_right_imm_alt_32(dst, src2, src1),
8616                                K::RotateRight64 => I::rotate_right_imm_alt_64(dst, src2, src1),
8617                            }
8618                        }
8619                        (RegImm::Imm(src1), RegImm::Imm(src2)) => {
8620                            if is_optimized {
8621                                unreachable!("internal error: instruction with only constant operands: {op:?}")
8622                            } else {
8623                                let imm: u32 = OperationKind::from(kind)
8624                                    .apply_const(cast(src1).to_i64_sign_extend(), cast(src2).to_i64_sign_extend())
8625                                    .try_into()
8626                                    .expect("load immediate overflow");
8627                                I::load_imm(dst, imm)
8628                            }
8629                        }
8630                    }
8631                }
8632                BasicInst::Cmov { kind, dst, src, cond } => match src {
8633                    RegImm::Reg(src) => {
8634                        codegen! {
8635                            args = (conv_reg(dst), conv_reg(src), conv_reg(cond)),
8636                            kind = kind,
8637                            {
8638                                CmovKind::EqZero => cmov_if_zero,
8639                                CmovKind::NotEqZero => cmov_if_not_zero,
8640                            }
8641                        }
8642                    }
8643                    RegImm::Imm(imm) => {
8644                        codegen! {
8645                            args = (conv_reg(dst), conv_reg(cond), cast(imm).to_unsigned()),
8646                            kind = kind,
8647                            {
8648                                CmovKind::EqZero => cmov_if_zero_imm,
8649                                CmovKind::NotEqZero => cmov_if_not_zero_imm,
8650                            }
8651                        }
8652                    }
8653                },
8654                BasicInst::Ecalli { nth_import } => {
8655                    assert!(used_imports.contains(&nth_import));
8656                    let import = &imports[nth_import];
8657                    Instruction::ecalli(import.metadata.index.expect("internal error: no index was assigned to an ecall"))
8658                }
8659                BasicInst::Sbrk { dst, size } => Instruction::sbrk(conv_reg(dst), conv_reg(size)),
8660                BasicInst::Memset => Instruction::memset,
8661                BasicInst::Nop => unreachable!("internal error: a nop instruction was not removed"),
8662                BasicInst::Prologue { .. } => unreachable!("internal error: a prologue instruction was not removed"),
8663                BasicInst::Epilogue { .. } => unreachable!("internal error: an epilogue instruction was not removed"),
8664            };
8665
8666            code.push((source.clone(), op));
8667        }
8668
8669        fn unconditional_jump(target: JumpTarget) -> Instruction {
8670            Instruction::jump(target.static_target)
8671        }
8672
8673        match block.next.instruction {
8674            ControlInst::Jump { target } => {
8675                let target = get_jump_target(target)?;
8676                if can_fallthrough_to_next_block.contains(block_target) {
8677                    assert!(basic_block_delimited);
8678                    basic_block_delimited = false;
8679                } else {
8680                    code.push((block.next.source.clone(), unconditional_jump(target)));
8681                }
8682            }
8683            ControlInst::Call { ra, target, target_return } => {
8684                assert!(can_fallthrough_to_next_block.contains(block_target));
8685
8686                let target = get_jump_target(target)?;
8687                let target_return = get_jump_target(target_return)?
8688                    .dynamic_target
8689                    .expect("missing jump target for address");
8690                let Some(target_return) = target_return.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
8691                    return Err(ProgramFromElfError::other("overflow when emitting an indirect call"));
8692                };
8693
8694                code.push((
8695                    block.next.source.clone(),
8696                    Instruction::load_imm_and_jump(conv_reg(ra), target_return, target.static_target),
8697                ));
8698            }
8699            ControlInst::JumpIndirect { base, offset } => {
8700                if offset != 0 {
8701                    return Err(ProgramFromElfError::other(
8702                        "found an indirect jump with a non-zero offset - this would most likely result in a broken program; aborting",
8703                    ));
8704                }
8705
8706                let Ok(offset) = offset.try_into() else {
8707                    unreachable!("internal error: indirect jump with an out-of-range offset");
8708                };
8709
8710                code.push((block.next.source.clone(), Instruction::jump_indirect(conv_reg(base), offset)));
8711            }
8712            ControlInst::CallIndirect {
8713                ra,
8714                base,
8715                offset,
8716                target_return,
8717            } => {
8718                if offset != 0 {
8719                    return Err(ProgramFromElfError::other(
8720                        "found an indirect call with a non-zero offset - this would most likely result in a broken program; aborting",
8721                    ));
8722                }
8723
8724                assert!(can_fallthrough_to_next_block.contains(block_target));
8725
8726                let target_return = get_jump_target(target_return)?
8727                    .dynamic_target
8728                    .expect("missing jump target for address");
8729                let Some(target_return) = target_return.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
8730                    return Err(ProgramFromElfError::other("overflow when emitting an indirect call"));
8731                };
8732
8733                let Ok(offset) = offset.try_into() else {
8734                    unreachable!("internal error: indirect call with an out-of-range offset");
8735                };
8736
8737                code.push((
8738                    block.next.source.clone(),
8739                    Instruction::load_imm_and_jump_indirect(conv_reg(ra), conv_reg(base), target_return, offset),
8740                ));
8741            }
8742            ControlInst::Branch {
8743                kind,
8744                src1,
8745                src2,
8746                target_true,
8747                target_false,
8748            } => {
8749                assert!(can_fallthrough_to_next_block.contains(block_target));
8750
8751                let target_true = get_jump_target(target_true)?;
8752                get_jump_target(target_false)?;
8753
8754                let instruction = match (src1, src2) {
8755                    (RegImm::Reg(src1), RegImm::Reg(src2)) => {
8756                        codegen! {
8757                            args = (conv_reg(src1), conv_reg(src2), target_true.static_target),
8758                            kind = kind,
8759                            {
8760                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq,
8761                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq,
8762                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_greater_or_equal_unsigned,
8763                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_greater_or_equal_signed,
8764                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_less_signed,
8765                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_less_unsigned,
8766                            }
8767                        }
8768                    }
8769                    (RegImm::Imm(src1), RegImm::Reg(src2)) => {
8770                        codegen! {
8771                            args = (conv_reg(src2), cast(src1).to_unsigned(), target_true.static_target),
8772                            kind = kind,
8773                            {
8774                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq_imm,
8775                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq_imm,
8776                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_less_or_equal_unsigned_imm,
8777                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_less_or_equal_signed_imm,
8778                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_greater_signed_imm,
8779                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_greater_unsigned_imm,
8780                            }
8781                        }
8782                    }
8783                    (RegImm::Reg(src1), RegImm::Imm(src2)) => {
8784                        codegen! {
8785                            args = (conv_reg(src1), cast(src2).to_unsigned(), target_true.static_target),
8786                            kind = kind,
8787                            {
8788                                BranchKind::Eq32 | BranchKind::Eq64 => branch_eq_imm,
8789                                BranchKind::NotEq32 | BranchKind::NotEq64 => branch_not_eq_imm,
8790                                BranchKind::LessSigned32 | BranchKind::LessSigned64 => branch_less_signed_imm,
8791                                BranchKind::LessUnsigned32 | BranchKind::LessUnsigned64 => branch_less_unsigned_imm,
8792                                BranchKind::GreaterOrEqualSigned32 | BranchKind::GreaterOrEqualSigned64 => branch_greater_or_equal_signed_imm,
8793                                BranchKind::GreaterOrEqualUnsigned32 | BranchKind::GreaterOrEqualUnsigned64 => branch_greater_or_equal_unsigned_imm,
8794                            }
8795                        }
8796                    }
8797                    (RegImm::Imm(src1), RegImm::Imm(src2)) => {
8798                        if is_optimized {
8799                            unreachable!("internal error: branch with only constant operands")
8800                        } else {
8801                            match OperationKind::from(kind).apply_const(cast(src1).to_i64_sign_extend(), cast(src2).to_i64_sign_extend()) {
8802                                1 => unconditional_jump(target_true),
8803                                0 => {
8804                                    assert!(can_fallthrough_to_next_block.contains(block_target));
8805                                    Instruction::fallthrough
8806                                }
8807                                _ => unreachable!(),
8808                            }
8809                        }
8810                    }
8811                };
8812
8813                code.push((block.next.source.clone(), instruction));
8814            }
8815            ControlInst::Unimplemented => {
8816                code.push((block.next.source.clone(), Instruction::trap));
8817            }
8818        }
8819    }
8820
8821    Ok((code, offsets))
8822}
8823
8824#[derive(Copy, Clone, PartialEq, Eq, Debug)]
8825enum Bitness {
8826    B32,
8827    B64,
8828}
8829
8830impl Bitness {
8831    fn bits_used_mask(self) -> u64 {
8832        match self {
8833            Bitness::B32 => u64::from(u32::MAX),
8834            Bitness::B64 => u64::MAX,
8835        }
8836    }
8837}
8838
8839impl From<Bitness> for u64 {
8840    fn from(value: Bitness) -> Self {
8841        match value {
8842            Bitness::B32 => 4,
8843            Bitness::B64 => 8,
8844        }
8845    }
8846}
8847
8848impl From<Bitness> for RelocationSize {
8849    fn from(value: Bitness) -> Self {
8850        match value {
8851            Bitness::B32 => RelocationSize::U32,
8852            Bitness::B64 => RelocationSize::U64,
8853        }
8854    }
8855}
8856
8857#[derive(Copy, Clone, PartialEq, Eq, Debug)]
8858pub(crate) enum RelocationSize {
8859    U8,
8860    U16,
8861    U32,
8862    U64,
8863}
8864
8865#[derive(Copy, Clone, Debug)]
8866pub(crate) enum SizeRelocationSize {
8867    SixBits,
8868    Uleb128,
8869    Generic(RelocationSize),
8870}
8871
8872#[derive(Copy, Clone, Debug)]
8873pub(crate) enum RelocationKind {
8874    Abs {
8875        target: SectionTarget,
8876        size: RelocationSize,
8877    },
8878    JumpTable {
8879        target_code: SectionTarget,
8880        target_base: SectionTarget,
8881    },
8882    Offset {
8883        origin: SectionTarget,
8884        target: SectionTarget,
8885        size: SizeRelocationSize,
8886    },
8887}
8888
8889impl RelocationKind {
8890    fn targets(&self) -> [Option<SectionTarget>; 2] {
8891        match self {
8892            RelocationKind::Abs { target, .. } => [Some(*target), None],
8893            RelocationKind::Offset { origin, target, .. } => [Some(*origin), Some(*target)],
8894            RelocationKind::JumpTable { target_code, target_base } => [Some(*target_code), Some(*target_base)],
8895        }
8896    }
8897}
8898
8899fn harvest_data_relocations(
8900    elf: &Elf,
8901    code_sections_set: &HashSet<SectionIndex>,
8902    section: &Section,
8903    relocations: &mut BTreeMap<SectionTarget, RelocationKind>,
8904) -> Result<(), ProgramFromElfError> {
8905    #[derive(Debug)]
8906    enum MutOp {
8907        Add,
8908        Sub,
8909    }
8910
8911    #[derive(Debug)]
8912    enum Kind {
8913        Set(RelocationKind),
8914        Mut(MutOp, RelocationSize, SectionTarget),
8915
8916        Set6 { target: SectionTarget },
8917        Sub6 { target: SectionTarget },
8918
8919        SetUleb128 { target: SectionTarget },
8920        SubUleb128 { target: SectionTarget },
8921    }
8922
8923    if section.relocations().next().is_none() {
8924        return Ok(());
8925    }
8926
8927    let section_name = section.name();
8928    log::trace!("Harvesting data relocations from section: {}", section_name);
8929
8930    let mut for_address = BTreeMap::new();
8931    for (absolute_address, relocation) in section.relocations() {
8932        let Some(relative_address) = absolute_address.checked_sub(section.original_address()) else {
8933            return Err(ProgramFromElfError::other("invalid data relocation offset"));
8934        };
8935
8936        if relocation.has_implicit_addend() {
8937            // AFAIK these should never be emitted for RISC-V.
8938            return Err(ProgramFromElfError::other(format!("unsupported relocation: {:?}", relocation)));
8939        }
8940
8941        let Some(target) = get_relocation_target(elf, &relocation)? else {
8942            continue;
8943        };
8944
8945        if relocation.flags()
8946            == (object::RelocationFlags::Elf {
8947                r_type: object::elf::R_RISCV_PCREL_HI20,
8948            })
8949            && section_name == ".polkavm_exports"
8950        {
8951            relocations.insert(
8952                SectionTarget {
8953                    section_index: section.index(),
8954                    offset: relative_address,
8955                },
8956                RelocationKind::Abs {
8957                    target,
8958                    size: if elf.is_64() { RelocationSize::U64 } else { RelocationSize::U32 },
8959                },
8960            );
8961
8962            continue;
8963        }
8964
8965        let (relocation_name, kind) = match (relocation.kind(), relocation.flags()) {
8966            (object::RelocationKind::Absolute, _)
8967                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 32 =>
8968            {
8969                (
8970                    "R_RISCV_32",
8971                    Kind::Set(RelocationKind::Abs {
8972                        target,
8973                        size: RelocationSize::U32,
8974                    }),
8975                )
8976            }
8977            (object::RelocationKind::Absolute, _)
8978                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 64 =>
8979            {
8980                (
8981                    "R_RISCV_64",
8982                    Kind::Set(RelocationKind::Abs {
8983                        target,
8984                        size: RelocationSize::U64,
8985                    }),
8986                )
8987            }
8988
8989            (_, object::RelocationFlags::Elf { r_type: reloc_kind }) => match reloc_kind {
8990                object::elf::R_RISCV_SET6 => ("R_RISCV_SET6", Kind::Set6 { target }),
8991                object::elf::R_RISCV_SUB6 => ("R_RISCV_SUB6", Kind::Sub6 { target }),
8992                object::elf::R_RISCV_SET8 => (
8993                    "R_RISCV_SET8",
8994                    Kind::Set(RelocationKind::Abs {
8995                        target,
8996                        size: RelocationSize::U8,
8997                    }),
8998                ),
8999                object::elf::R_RISCV_SET16 => (
9000                    "R_RISCV_SET16",
9001                    Kind::Set(RelocationKind::Abs {
9002                        target,
9003                        size: RelocationSize::U16,
9004                    }),
9005                ),
9006                object::elf::R_RISCV_ADD8 => ("R_RISCV_ADD8", Kind::Mut(MutOp::Add, RelocationSize::U8, target)),
9007                object::elf::R_RISCV_SUB8 => ("R_RISCV_SUB8", Kind::Mut(MutOp::Sub, RelocationSize::U8, target)),
9008                object::elf::R_RISCV_ADD16 => ("R_RISCV_ADD16", Kind::Mut(MutOp::Add, RelocationSize::U16, target)),
9009                object::elf::R_RISCV_SUB16 => ("R_RISCV_SUB16", Kind::Mut(MutOp::Sub, RelocationSize::U16, target)),
9010                object::elf::R_RISCV_ADD32 => ("R_RISCV_ADD32", Kind::Mut(MutOp::Add, RelocationSize::U32, target)),
9011                object::elf::R_RISCV_ADD64 => ("R_RISCV_ADD64", Kind::Mut(MutOp::Add, RelocationSize::U64, target)),
9012                object::elf::R_RISCV_SUB32 => ("R_RISCV_SUB32", Kind::Mut(MutOp::Sub, RelocationSize::U32, target)),
9013                object::elf::R_RISCV_SUB64 => ("R_RISCV_SUB64", Kind::Mut(MutOp::Sub, RelocationSize::U64, target)),
9014                object::elf::R_RISCV_SET_ULEB128 => ("R_RISCV_SET_ULEB128", Kind::SetUleb128 { target }),
9015                object::elf::R_RISCV_SUB_ULEB128 => ("R_RISCV_SUB_ULEB128", Kind::SubUleb128 { target }),
9016
9017                _ => {
9018                    return Err(ProgramFromElfError::other(format!(
9019                        "unsupported relocation in data section '{section_name}': {relocation:?}"
9020                    )))
9021                }
9022            },
9023            _ => {
9024                return Err(ProgramFromElfError::other(format!(
9025                    "unsupported relocation in data section '{section_name}': {relocation:?}"
9026                )))
9027            }
9028        };
9029
9030        log::trace!("  {relocation_name}: {section_name}[0x{relative_address:x}] (0x{absolute_address:x}): -> {target}");
9031        for_address
9032            .entry(relative_address)
9033            .or_insert_with(Vec::new)
9034            .push((relocation_name, kind));
9035    }
9036
9037    for (relative_address, list) in for_address {
9038        let current_location = SectionTarget {
9039            section_index: section.index(),
9040            offset: relative_address,
9041        };
9042
9043        struct ErrorToken; // To make sure we don't forget a `continue` anywhere.
9044        let _: ErrorToken = match &*list {
9045            [(_, Kind::Set(kind))] => {
9046                relocations.insert(current_location, *kind);
9047                continue;
9048            }
9049            [(_, Kind::Mut(MutOp::Add, size_1, target_1)), (_, Kind::Mut(MutOp::Sub, size_2, target_2))]
9050                if size_1 == size_2
9051                    && matches!(*size_1, RelocationSize::U32 | RelocationSize::U64)
9052                    && code_sections_set.contains(&target_1.section_index)
9053                    && !code_sections_set.contains(&target_2.section_index) =>
9054            {
9055                if *size_1 == RelocationSize::U64 {
9056                    // We could support this, but I'm not sure if anything ever emits this,
9057                    // so let's return an error for now until somebody complains.
9058                    return Err(ProgramFromElfError::other(
9059                        "internal error: found 64-bit jump table relocation; please report this",
9060                    ));
9061                }
9062
9063                relocations.insert(
9064                    current_location,
9065                    RelocationKind::JumpTable {
9066                        target_code: *target_1,
9067                        target_base: *target_2,
9068                    },
9069                );
9070                continue;
9071            }
9072            [(_, Kind::Mut(MutOp::Add, size_1, target_1)), (_, Kind::Mut(MutOp::Sub, size_2, target_2))] if size_1 == size_2 => {
9073                relocations.insert(
9074                    current_location,
9075                    RelocationKind::Offset {
9076                        origin: *target_2,
9077                        target: *target_1,
9078                        size: SizeRelocationSize::Generic(*size_1),
9079                    },
9080                );
9081                continue;
9082            }
9083            [(
9084                _,
9085                Kind::Set(RelocationKind::Abs {
9086                    target: target_1,
9087                    size: size_1,
9088                }),
9089            ), (_, Kind::Mut(MutOp::Sub, size_2, target_2))]
9090                if size_1 == size_2 =>
9091            {
9092                relocations.insert(
9093                    current_location,
9094                    RelocationKind::Offset {
9095                        origin: *target_2,
9096                        target: *target_1,
9097                        size: SizeRelocationSize::Generic(*size_1),
9098                    },
9099                );
9100                continue;
9101            }
9102            [(_, Kind::Set6 { target: target_1 }), (_, Kind::Sub6 { target: target_2 })] => {
9103                relocations.insert(
9104                    current_location,
9105                    RelocationKind::Offset {
9106                        origin: *target_2,
9107                        target: *target_1,
9108                        size: SizeRelocationSize::SixBits,
9109                    },
9110                );
9111                continue;
9112            }
9113            [(_, Kind::SetUleb128 { target: target_1 }), (_, Kind::SubUleb128 { target: target_2 })] => {
9114                relocations.insert(
9115                    current_location,
9116                    RelocationKind::Offset {
9117                        origin: *target_2,
9118                        target: *target_1,
9119                        size: SizeRelocationSize::Uleb128,
9120                    },
9121                );
9122                continue;
9123            }
9124            _ => ErrorToken,
9125        };
9126
9127        return Err(ProgramFromElfError::other(format!(
9128            "unsupported relocations for '{section_name}'[{relative_address:x}] (0x{absolute_address:08x}): {list}",
9129            absolute_address = section.original_address() + relative_address,
9130            list = SectionTarget::make_human_readable_in_debug_string(elf, &format!("{list:?}")),
9131        )));
9132    }
9133
9134    Ok(())
9135}
9136
9137fn read_u32(data: &[u8], relative_address: u64) -> Result<u32, ProgramFromElfError> {
9138    let target_range = relative_address as usize..relative_address as usize + 4;
9139    let value = data
9140        .get(target_range)
9141        .ok_or(ProgramFromElfError::other("out of range relocation"))?;
9142    Ok(u32::from_le_bytes([value[0], value[1], value[2], value[3]]))
9143}
9144
9145fn read_u16(data: &[u8], relative_address: u64) -> Result<u16, ProgramFromElfError> {
9146    let target_range = relative_address as usize..relative_address as usize + 2;
9147    let value = data
9148        .get(target_range)
9149        .ok_or(ProgramFromElfError::other("out of range relocation"))?;
9150    Ok(u16::from_le_bytes([value[0], value[1]]))
9151}
9152
9153fn read_u8(data: &[u8], relative_address: u64) -> Result<u8, ProgramFromElfError> {
9154    data.get(relative_address as usize)
9155        .ok_or(ProgramFromElfError::other("out of range relocation"))
9156        .copied()
9157}
9158
9159/// ULEB128 encode `value` and overwrite the existing value at `data_offset`, keeping the length.
9160///
9161/// See the [ELF ABI spec] and [LLD implementation] for reference.
9162///
9163/// [ELF ABI spec]: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/fbf3cbbac00ef1860ae60302a9afedb98fd31109/riscv-elf.adoc#uleb128-note
9164/// [LLD implementation]: https://github.com/llvm/llvm-project/blob/release/18.x/lld/ELF/Target.h#L310
9165fn overwrite_uleb128(data: &mut [u8], mut data_offset: usize, mut value: u64) -> Result<(), ProgramFromElfError> {
9166    loop {
9167        let Some(byte) = data.get_mut(data_offset) else {
9168            return Err(ProgramFromElfError::other("ULEB128 relocation target offset out of bounds"));
9169        };
9170        data_offset += 1;
9171
9172        if *byte & 0x80 != 0 {
9173            *byte = 0x80 | (value as u8 & 0x7f);
9174            value >>= 7;
9175        } else {
9176            *byte = value as u8;
9177            return if value > 0x80 {
9178                Err(ProgramFromElfError::other("ULEB128 relocation overflow"))
9179            } else {
9180                Ok(())
9181            };
9182        }
9183    }
9184}
9185
9186#[test]
9187fn test_overwrite_uleb128() {
9188    let value = 624485;
9189    let encoded_value = vec![0xE5u8, 0x8E, 0x26];
9190    let mut data = vec![0x80, 0x80, 0x00];
9191
9192    overwrite_uleb128(&mut data, 0, value).unwrap();
9193
9194    assert_eq!(data, encoded_value);
9195}
9196
9197fn write_u64(data: &mut [u8], relative_address: u64, value: u64) -> Result<(), ProgramFromElfError> {
9198    let value = value.to_le_bytes();
9199    data[relative_address as usize + 7] = value[7];
9200    data[relative_address as usize + 6] = value[6];
9201    data[relative_address as usize + 5] = value[5];
9202    data[relative_address as usize + 4] = value[4];
9203    data[relative_address as usize + 3] = value[3];
9204    data[relative_address as usize + 2] = value[2];
9205    data[relative_address as usize + 1] = value[1];
9206    data[relative_address as usize] = value[0];
9207    Ok(())
9208}
9209
9210fn write_u32(data: &mut [u8], relative_address: u64, value: u32) -> Result<(), ProgramFromElfError> {
9211    let value = value.to_le_bytes();
9212    data[relative_address as usize + 3] = value[3];
9213    data[relative_address as usize + 2] = value[2];
9214    data[relative_address as usize + 1] = value[1];
9215    data[relative_address as usize] = value[0];
9216    Ok(())
9217}
9218
9219fn write_u16(data: &mut [u8], relative_address: u64, value: u16) -> Result<(), ProgramFromElfError> {
9220    let value = value.to_le_bytes();
9221    data[relative_address as usize + 1] = value[1];
9222    data[relative_address as usize] = value[0];
9223    Ok(())
9224}
9225
9226fn harvest_code_relocations(
9227    elf: &Elf,
9228    section: &Section,
9229    decoder_config: &DecoderConfig,
9230    instruction_overrides: &mut HashMap<SectionTarget, InstExt<SectionTarget, SectionTarget>>,
9231    data_relocations: &mut BTreeMap<SectionTarget, RelocationKind>,
9232) -> Result<(), ProgramFromElfError> {
9233    fn jump_or_call<T>(ra: RReg, target: T, target_return: T) -> Result<ControlInst<T>, ProgramFromElfError> {
9234        if let Some(ra) = cast_reg_non_zero(ra)? {
9235            Ok(ControlInst::Call { ra, target, target_return })
9236        } else {
9237            Ok(ControlInst::Jump { target })
9238        }
9239    }
9240
9241    #[derive(Copy, Clone)]
9242    enum HiRelocKind {
9243        PcRel,
9244        Got,
9245    }
9246
9247    impl core::fmt::Display for HiRelocKind {
9248        fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
9249            match self {
9250                HiRelocKind::PcRel => fmt.write_str("R_RISCV_PCREL_HI20"),
9251                HiRelocKind::Got => fmt.write_str("R_RISCV_GOT_HI20"),
9252            }
9253        }
9254    }
9255
9256    #[derive(Default)]
9257    struct RelocPairs {
9258        reloc_pcrel_hi20: BTreeMap<u64, (HiRelocKind, SectionTarget)>,
9259        reloc_pcrel_lo12: BTreeMap<u64, (&'static str, u64)>,
9260    }
9261
9262    if section.relocations().next().is_none() {
9263        return Ok(());
9264    }
9265
9266    let mut pcrel_relocations = RelocPairs::default();
9267
9268    let section_name = section.name();
9269    log::trace!("Harvesting code relocations from section: {}", section_name);
9270
9271    let section_data = section.data();
9272    for (absolute_address, relocation) in section.relocations() {
9273        let Some(relative_address) = absolute_address.checked_sub(section.original_address()) else {
9274            return Err(ProgramFromElfError::other("invalid code relocation offset"));
9275        };
9276
9277        if relocation.has_implicit_addend() {
9278            // AFAIK these should never be emitted for RISC-V.
9279            return Err(ProgramFromElfError::other(format!(
9280                "unsupported relocation in section '{section_name}': {relocation:?}"
9281            )));
9282        }
9283
9284        let current_location = SectionTarget {
9285            section_index: section.index(),
9286            offset: relative_address,
9287        };
9288
9289        let relative_address = current_location.offset;
9290        let Some(target) = get_relocation_target(elf, &relocation)? else {
9291            continue;
9292        };
9293
9294        match (relocation.kind(), relocation.flags()) {
9295            (object::RelocationKind::Absolute, _)
9296                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 32 =>
9297            {
9298                data_relocations.insert(
9299                    current_location,
9300                    RelocationKind::Abs {
9301                        target,
9302                        size: RelocationSize::U32,
9303                    },
9304                );
9305            }
9306            (object::RelocationKind::Absolute, _)
9307                if relocation.encoding() == object::RelocationEncoding::Generic && relocation.size() == 64 =>
9308            {
9309                data_relocations.insert(
9310                    current_location,
9311                    RelocationKind::Abs {
9312                        target,
9313                        size: RelocationSize::U64,
9314                    },
9315                );
9316            }
9317            (_, object::RelocationFlags::Elf { r_type: reloc_kind }) => {
9318                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/releases
9319                match reloc_kind {
9320                    object::elf::R_RISCV_CALL_PLT => {
9321                        // This relocation is for a pair of instructions, namely AUIPC + JALR, where we're allowed to delete the AUIPC if it's unnecessary.
9322                        let Some(xs) = section_data.get(current_location.offset as usize..current_location.offset as usize + 8) else {
9323                            return Err(ProgramFromElfError::other("invalid R_RISCV_CALL_PLT relocation"));
9324                        };
9325
9326                        let hi_inst_raw = u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]);
9327                        let Some(hi_inst) = Inst::decode(decoder_config, hi_inst_raw) else {
9328                            return Err(ProgramFromElfError::other(format!(
9329                                "R_RISCV_CALL_PLT for an unsupported instruction (1st): 0x{hi_inst_raw:08}"
9330                            )));
9331                        };
9332
9333                        let lo_inst_raw = u32::from_le_bytes([xs[4], xs[5], xs[6], xs[7]]);
9334                        let Some(lo_inst) = Inst::decode(decoder_config, lo_inst_raw) else {
9335                            return Err(ProgramFromElfError::other(format!(
9336                                "R_RISCV_CALL_PLT for an unsupported instruction (2nd): 0x{lo_inst_raw:08}"
9337                            )));
9338                        };
9339
9340                        let Inst::AddUpperImmediateToPc { dst: hi_reg, value: _ } = hi_inst else {
9341                            return Err(ProgramFromElfError::other(format!(
9342                                "R_RISCV_CALL_PLT for an unsupported instruction (1st): 0x{hi_inst_raw:08} ({hi_inst:?})"
9343                            )));
9344                        };
9345
9346                        let Inst::JumpAndLinkRegister {
9347                            dst: lo_dst,
9348                            base: lo_reg,
9349                            value: _,
9350                        } = lo_inst
9351                        else {
9352                            return Err(ProgramFromElfError::other(format!(
9353                                "R_RISCV_CALL_PLT for an unsupported instruction (2nd): 0x{lo_inst_raw:08} ({lo_inst:?})"
9354                            )));
9355                        };
9356
9357                        if hi_reg != lo_reg {
9358                            return Err(ProgramFromElfError::other(
9359                                "R_RISCV_CALL_PLT for a pair of instructions with different destination registers",
9360                            ));
9361                        }
9362
9363                        let target_return = current_location.add(8);
9364                        instruction_overrides.insert(current_location, InstExt::nop());
9365                        instruction_overrides.insert(
9366                            current_location.add(4),
9367                            InstExt::Control(jump_or_call(lo_dst, target, target_return)?),
9368                        );
9369
9370                        log::trace!(
9371                            "  R_RISCV_CALL_PLT: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9372                            section.name(),
9373                            target
9374                        );
9375                    }
9376                    object::elf::R_RISCV_PCREL_HI20 => {
9377                        if let Some(raw_inst) = section_data
9378                            .get((relative_address as usize).wrapping_sub(4)..)
9379                            .and_then(|slice| slice.get(..4))
9380                        {
9381                            let raw_inst = u32::from_le_bytes([raw_inst[0], raw_inst[1], raw_inst[2], raw_inst[3]]);
9382                            if crate::riscv::R(raw_inst).unpack()
9383                                == (crate::riscv::OPCODE_CUSTOM_0, FUNC3_ECALLI, 0, RReg::Zero, RReg::Zero, RReg::Zero)
9384                            {
9385                                data_relocations.insert(
9386                                    current_location,
9387                                    RelocationKind::Abs {
9388                                        target,
9389                                        size: if elf.is_64() { RelocationSize::U64 } else { RelocationSize::U32 },
9390                                    },
9391                                );
9392                                continue;
9393                            }
9394                        }
9395
9396                        // This relocation is for an AUIPC.
9397                        pcrel_relocations
9398                            .reloc_pcrel_hi20
9399                            .insert(relative_address, (HiRelocKind::PcRel, target));
9400                        log::trace!(
9401                            "  R_RISCV_PCREL_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9402                            section.name(),
9403                            target
9404                        );
9405                    }
9406                    object::elf::R_RISCV_GOT_HI20 => {
9407                        pcrel_relocations
9408                            .reloc_pcrel_hi20
9409                            .insert(relative_address, (HiRelocKind::Got, target));
9410                        log::trace!(
9411                            "  R_RISCV_GOT_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9412                            section.name(),
9413                            target
9414                        );
9415                    }
9416                    object::elf::R_RISCV_PCREL_LO12_I => {
9417                        if target.section_index != section.index() {
9418                            return Err(ProgramFromElfError::other(
9419                                "R_RISCV_PCREL_LO12_I relocation points to a different section",
9420                            ));
9421                        }
9422
9423                        pcrel_relocations
9424                            .reloc_pcrel_lo12
9425                            .insert(relative_address, ("R_RISCV_PCREL_LO12_I", target.offset));
9426                        log::trace!(
9427                            "  R_RISCV_PCREL_LO12_I: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9428                            section.name(),
9429                            target
9430                        );
9431                    }
9432                    object::elf::R_RISCV_PCREL_LO12_S => {
9433                        if target.section_index != section.index() {
9434                            return Err(ProgramFromElfError::other(
9435                                "R_RISCV_PCREL_LO12_I relocation points to a different section",
9436                            ));
9437                        }
9438
9439                        pcrel_relocations
9440                            .reloc_pcrel_lo12
9441                            .insert(relative_address, ("R_RISCV_PCREL_LO12_S", target.offset));
9442                        log::trace!(
9443                            "  R_RISCV_PCREL_LO12_S: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9444                            section.name(),
9445                            target
9446                        );
9447                    }
9448                    object::elf::R_RISCV_JAL => {
9449                        let inst_raw = read_u32(section_data, relative_address)?;
9450                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
9451                            return Err(ProgramFromElfError::other(format!(
9452                                "R_RISCV_JAL for an unsupported instruction: 0x{inst_raw:08}"
9453                            )));
9454                        };
9455
9456                        let Inst::JumpAndLink { dst, .. } = inst else {
9457                            return Err(ProgramFromElfError::other(format!(
9458                                "R_RISCV_JAL for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
9459                            )));
9460                        };
9461
9462                        let target_return = current_location.add(4);
9463                        instruction_overrides.insert(current_location, InstExt::Control(jump_or_call(dst, target, target_return)?));
9464
9465                        log::trace!(
9466                            "  R_RISCV_JAL: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
9467                            section.name(),
9468                            target
9469                        );
9470                    }
9471                    object::elf::R_RISCV_BRANCH => {
9472                        let inst_raw = read_u32(section_data, relative_address)?;
9473                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
9474                            return Err(ProgramFromElfError::other(format!(
9475                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:08}"
9476                            )));
9477                        };
9478
9479                        let Inst::Branch { kind, src1, src2, .. } = inst else {
9480                            return Err(ProgramFromElfError::other(format!(
9481                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
9482                            )));
9483                        };
9484
9485                        let target_false = current_location.add(4);
9486                        instruction_overrides.insert(
9487                            current_location,
9488                            InstExt::Control(ControlInst::Branch {
9489                                kind,
9490                                src1: cast_reg_any(src1)?,
9491                                src2: cast_reg_any(src2)?,
9492                                target_true: target,
9493                                target_false,
9494                            }),
9495                        );
9496
9497                        log::trace!(
9498                            "  R_RISCV_BRANCH: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
9499                            section.name(),
9500                            target
9501                        );
9502                    }
9503                    object::elf::R_RISCV_HI20 => {
9504                        // This relocation is for a LUI.
9505                        let inst_raw = read_u32(section_data, relative_address)?;
9506                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
9507                            return Err(ProgramFromElfError::other(format!(
9508                                "R_RISCV_HI20 for an unsupported instruction: 0x{inst_raw:08}"
9509                            )));
9510                        };
9511
9512                        let Inst::LoadUpperImmediate { dst, value: _ } = inst else {
9513                            return Err(ProgramFromElfError::other(format!(
9514                                "R_RISCV_HI20 for an unsupported instruction: 0x{inst_raw:08} ({inst:?})"
9515                            )));
9516                        };
9517
9518                        let Some(dst) = cast_reg_non_zero(dst)? else {
9519                            return Err(ProgramFromElfError::other("R_RISCV_HI20 with a zero destination register"));
9520                        };
9521
9522                        instruction_overrides.insert(current_location, InstExt::Basic(BasicInst::LoadAddress { dst, target }));
9523
9524                        log::trace!(
9525                            "  R_RISCV_HI20: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9526                            section.name(),
9527                            target
9528                        );
9529
9530                        continue;
9531                    }
9532                    object::elf::R_RISCV_LO12_I => {
9533                        let inst_raw = read_u32(section_data, relative_address)?;
9534                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
9535                            return Err(ProgramFromElfError::other(format!(
9536                                "R_RISCV_LO12_I for an unsupported instruction: 0x{inst_raw:08}"
9537                            )));
9538                        };
9539
9540                        let new_instruction = match inst {
9541                            Inst::RegImm {
9542                                kind: RegImmKind::Add32,
9543                                dst,
9544                                src: _,
9545                                imm: _,
9546                            } => {
9547                                if let Some(dst) = cast_reg_non_zero(dst)? {
9548                                    InstExt::Basic(BasicInst::LoadAddress { dst, target })
9549                                } else {
9550                                    InstExt::nop()
9551                                }
9552                            }
9553                            Inst::RegImm {
9554                                kind: RegImmKind::Add64,
9555                                dst,
9556                                src: _,
9557                                imm: _,
9558                            } => {
9559                                if let Some(dst) = cast_reg_non_zero(dst)? {
9560                                    InstExt::Basic(BasicInst::LoadAddress { dst, target })
9561                                } else {
9562                                    InstExt::nop()
9563                                }
9564                            }
9565                            Inst::Load {
9566                                kind,
9567                                dst,
9568                                base: _,
9569                                offset: _,
9570                            } => {
9571                                if let Some(dst) = cast_reg_non_zero(dst)? {
9572                                    InstExt::Basic(BasicInst::LoadAbsolute { kind, dst, target })
9573                                } else {
9574                                    InstExt::nop()
9575                                }
9576                            }
9577                            _ => {
9578                                return Err(ProgramFromElfError::other(format!(
9579                                    "R_RISCV_LO12_I for an unsupported instruction: 0x{inst_raw:08} ({inst:?}) (at {loc})",
9580                                    loc = current_location.fmt_human_readable(elf),
9581                                )));
9582                            }
9583                        };
9584
9585                        instruction_overrides.insert(current_location, new_instruction);
9586
9587                        log::trace!(
9588                            "  R_RISCV_LO12_I: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9589                            section.name(),
9590                            target
9591                        );
9592                    }
9593                    object::elf::R_RISCV_LO12_S => {
9594                        let inst_raw = read_u32(section_data, relative_address)?;
9595                        let Some(inst) = Inst::decode(decoder_config, inst_raw) else {
9596                            return Err(ProgramFromElfError::other(format!(
9597                                "R_RISCV_LO12_S for an unsupported instruction: 0x{inst_raw:08}"
9598                            )));
9599                        };
9600
9601                        let new_instruction = match inst {
9602                            Inst::Store {
9603                                kind,
9604                                src,
9605                                base: _,
9606                                offset: _,
9607                            } => InstExt::Basic(BasicInst::StoreAbsolute {
9608                                kind,
9609                                src: cast_reg_any(src)?,
9610                                target,
9611                            }),
9612                            _ => {
9613                                return Err(ProgramFromElfError::other(format!(
9614                                    "R_RISCV_LO12_S for an unsupported instruction: 0x{inst_raw:08} ({inst:?}) (at {loc})",
9615                                    loc = current_location.fmt_human_readable(elf),
9616                                )));
9617                            }
9618                        };
9619
9620                        instruction_overrides.insert(current_location, new_instruction);
9621
9622                        log::trace!(
9623                            "  R_RISCV_LO12_S: {}[0x{relative_address:x}] (0x{absolute_address:x}): -> {}",
9624                            section.name(),
9625                            target
9626                        );
9627                    }
9628                    object::elf::R_RISCV_RVC_JUMP => {
9629                        let inst_raw = read_u16(section_data, relative_address)?;
9630                        let Some(inst) = Inst::decode(decoder_config, inst_raw.into()) else {
9631                            return Err(ProgramFromElfError::other(format!(
9632                                "R_RISCV_RVC_JUMP for an unsupported instruction: 0x{inst_raw:04}"
9633                            )));
9634                        };
9635
9636                        let (Inst::JumpAndLink { dst, .. } | Inst::JumpAndLinkRegister { dst, .. }) = inst else {
9637                            return Err(ProgramFromElfError::other(format!(
9638                                "R_RISCV_RVC_JUMP for an unsupported instruction: 0x{inst_raw:04} ({inst:?})"
9639                            )));
9640                        };
9641
9642                        let target_return = current_location.add(2);
9643                        instruction_overrides.insert(current_location, InstExt::Control(jump_or_call(dst, target, target_return)?));
9644
9645                        log::trace!(
9646                            "  R_RISCV_RVC_JUMP: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
9647                            section.name(),
9648                            target
9649                        );
9650                    }
9651                    object::elf::R_RISCV_RVC_BRANCH => {
9652                        let inst_raw = read_u16(section_data, relative_address)?;
9653                        let Some(inst) = Inst::decode(decoder_config, inst_raw.into()) else {
9654                            return Err(ProgramFromElfError::other(format!(
9655                                "R_RISCV_RVC_BRANCH for an unsupported instruction: 0x{inst_raw:04}"
9656                            )));
9657                        };
9658
9659                        let Inst::Branch { kind, src1, src2, .. } = inst else {
9660                            return Err(ProgramFromElfError::other(format!(
9661                                "R_RISCV_BRANCH for an unsupported instruction: 0x{inst_raw:04} ({inst:?})"
9662                            )));
9663                        };
9664
9665                        let target_false = current_location.add(2);
9666                        instruction_overrides.insert(
9667                            current_location,
9668                            InstExt::Control(ControlInst::Branch {
9669                                kind,
9670                                src1: cast_reg_any(src1)?,
9671                                src2: cast_reg_any(src2)?,
9672                                target_true: target,
9673                                target_false,
9674                            }),
9675                        );
9676
9677                        log::trace!(
9678                            "  R_RISCV_RVC_BRANCH: {}[0x{relative_address:x}] (0x{absolute_address:x} -> {}",
9679                            section.name(),
9680                            target
9681                        );
9682                    }
9683                    object::elf::R_RISCV_RELAX => {}
9684                    _ => {
9685                        return Err(ProgramFromElfError::other(format!(
9686                            "unsupported relocation type in section '{}': 0x{:08x}",
9687                            section.name(),
9688                            reloc_kind
9689                        )));
9690                    }
9691                }
9692            }
9693            _ => {
9694                return Err(ProgramFromElfError::other(format!(
9695                    "unsupported relocation in code section '{}': {:?}",
9696                    section.name(),
9697                    relocation
9698                )))
9699            }
9700        }
9701    }
9702
9703    for (relative_lo, (lo_rel_name, relative_hi)) in pcrel_relocations.reloc_pcrel_lo12 {
9704        let lo_inst_raw = &section_data[relative_lo as usize..][..4];
9705        let lo_inst_raw = u32::from_le_bytes([lo_inst_raw[0], lo_inst_raw[1], lo_inst_raw[2], lo_inst_raw[3]]);
9706        let lo_inst = Inst::decode(decoder_config, lo_inst_raw);
9707        let hi_inst_raw = &section_data[relative_hi as usize..][..4];
9708        let hi_inst_raw = u32::from_le_bytes([hi_inst_raw[0], hi_inst_raw[1], hi_inst_raw[2], hi_inst_raw[3]]);
9709        let hi_inst = Inst::decode(decoder_config, hi_inst_raw);
9710
9711        let Some((hi_kind, target)) = pcrel_relocations.reloc_pcrel_hi20.get(&relative_hi).copied() else {
9712            return Err(ProgramFromElfError::other(format!("{lo_rel_name} relocation at '{section_name}'0x{relative_lo:x} targets '{section_name}'0x{relative_hi:x} which doesn't have a R_RISCV_PCREL_HI20 or R_RISCV_GOT_HI20 relocation")));
9713        };
9714
9715        let Some(hi_inst) = hi_inst else {
9716            return Err(ProgramFromElfError::other(format!(
9717                "{hi_kind} relocation for an unsupported instruction at '{section_name}'0x{relative_hi:x}: 0x{hi_inst_raw:08x}"
9718            )));
9719        };
9720
9721        let Inst::AddUpperImmediateToPc { dst: hi_reg, .. } = hi_inst else {
9722            return Err(ProgramFromElfError::other(format!(
9723                "{hi_kind} relocation for an unsupported instruction at '{section_name}'[0x{relative_hi:x}]: {hi_inst:?}"
9724            )));
9725        };
9726
9727        let Some(lo_inst) = lo_inst else {
9728            return Err(ProgramFromElfError::other(format!(
9729                "{lo_rel_name} relocation for an unsupported instruction: 0x{lo_inst_raw:08x}"
9730            )));
9731        };
9732
9733        let (lo_reg, new_instruction) = if matches!(hi_kind, HiRelocKind::Got) {
9734            // For these relocations the target address points to the symbol that the code wants to reference,
9735            // but the actual address that's in the code shouldn't point to the symbol directly, but to a place
9736            // where the symbol's address can be found.
9737
9738            match lo_inst {
9739                Inst::Load {
9740                    kind: LoadKind::U64,
9741                    base,
9742                    dst,
9743                    ..
9744                } if elf.is_64() => {
9745                    let Some(dst) = cast_reg_non_zero(dst)? else {
9746                        return Err(ProgramFromElfError::other(format!(
9747                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
9748                        )));
9749                    };
9750
9751                    (base, InstExt::Basic(BasicInst::LoadAddressIndirect { dst, target }))
9752                }
9753                Inst::Load {
9754                    kind: LoadKind::I32,
9755                    base,
9756                    dst,
9757                    ..
9758                } => {
9759                    let Some(dst) = cast_reg_non_zero(dst)? else {
9760                        return Err(ProgramFromElfError::other(format!(
9761                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
9762                        )));
9763                    };
9764
9765                    (base, InstExt::Basic(BasicInst::LoadAddressIndirect { dst, target }))
9766                }
9767                _ => {
9768                    return Err(ProgramFromElfError::other(format!(
9769                        "{lo_rel_name} relocation (with {hi_kind} as the upper relocation) for an unsupported instruction: {lo_inst:?}"
9770                    )));
9771                }
9772            }
9773        } else {
9774            match lo_inst {
9775                Inst::RegImm {
9776                    kind: RegImmKind::Add32,
9777                    src,
9778                    dst,
9779                    ..
9780                } if !elf.is_64() => {
9781                    let Some(dst) = cast_reg_non_zero(dst)? else {
9782                        return Err(ProgramFromElfError::other(format!(
9783                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
9784                        )));
9785                    };
9786
9787                    (src, InstExt::Basic(BasicInst::LoadAddress { dst, target }))
9788                }
9789                Inst::RegImm {
9790                    kind: RegImmKind::Add64,
9791                    src,
9792                    dst,
9793                    ..
9794                } if elf.is_64() => {
9795                    let Some(dst) = cast_reg_non_zero(dst)? else {
9796                        return Err(ProgramFromElfError::other(format!(
9797                            "{lo_rel_name} with a zero destination register: 0x{lo_inst_raw:08x} in {section_name}[0x{relative_lo:08x}]"
9798                        )));
9799                    };
9800
9801                    (src, InstExt::Basic(BasicInst::LoadAddress { dst, target }))
9802                }
9803                Inst::Load { kind, base, dst, .. } => {
9804                    if let Some(dst) = cast_reg_non_zero(dst)? {
9805                        (base, InstExt::Basic(BasicInst::LoadAbsolute { kind, dst, target }))
9806                    } else {
9807                        (base, InstExt::nop())
9808                    }
9809                }
9810                Inst::Store { kind, base, src, .. } => (
9811                    base,
9812                    InstExt::Basic(BasicInst::StoreAbsolute {
9813                        kind,
9814                        src: cast_reg_any(src)?,
9815                        target,
9816                    }),
9817                ),
9818                _ => {
9819                    return Err(ProgramFromElfError::other(format!(
9820                        "{lo_rel_name} relocation (with {hi_kind} as the upper relocation) for an unsupported instruction: {lo_inst:?}"
9821                    )));
9822                }
9823            }
9824        };
9825
9826        if lo_reg != hi_reg {
9827            // NOTE: These *can* apparently be sometimes different, so it's not an error if this happens.
9828            //
9829            // I've seen a case where the whole thing looked roughly like this:
9830            //
9831            //   auipc   a1,0x2057        # HI
9832            //   sw      a1,4(sp)         # Stash the HI part on the stack
9833            //   lw      a1,-460(a1)      # LO (1)
9834            //   ... a bunch of code ...
9835            //   lw      a2,4(sp)         # Reload the HI port from the stack (note different register)
9836            //   sw      a0,-460(a2)      # LO (2)
9837            log::trace!(
9838                "{lo_rel_name} + {hi_kind} relocation pair in '{section_name}' [+0x{relative_lo:x}, +0x{relative_hi:x}] uses different destination registers ({lo_reg:?} and {hi_reg:?})",
9839            );
9840        }
9841
9842        let location_hi = SectionTarget {
9843            section_index: section.index(),
9844            offset: relative_hi,
9845        };
9846        let location_lo = SectionTarget {
9847            section_index: section.index(),
9848            offset: relative_lo,
9849        };
9850
9851        // Since we support full length immediates just turn the upper instructions into a NOP.
9852        instruction_overrides.insert(location_hi, InstExt::nop());
9853        instruction_overrides.insert(location_lo, new_instruction);
9854    }
9855
9856    Ok(())
9857}
9858
9859fn parse_function_symbols(elf: &Elf) -> Result<Vec<(Source, String)>, ProgramFromElfError> {
9860    let mut functions = Vec::new();
9861    for sym in elf.symbols() {
9862        match sym.kind() {
9863            object::elf::STT_FUNC => {
9864                if sym.is_undefined() {
9865                    continue;
9866                }
9867
9868                let target = sym.section_target()?;
9869                let Some(name) = sym.name() else { continue };
9870
9871                if name.is_empty() {
9872                    continue;
9873                }
9874
9875                let source = Source {
9876                    section_index: target.section_index,
9877                    offset_range: (target.offset..target.offset + sym.size()).into(),
9878                };
9879
9880                functions.push((source, name.to_owned()));
9881            }
9882            object::elf::STT_NOTYPE | object::elf::STT_OBJECT | object::elf::STT_SECTION | object::elf::STT_FILE => {}
9883            kind => return Err(ProgramFromElfError::other(format!("unsupported symbol type: {}", kind))),
9884        }
9885    }
9886
9887    functions.sort_unstable_by_key(|(source, _)| *source);
9888    functions.dedup_by_key(|(source, _)| *source);
9889
9890    Ok(functions)
9891}
9892
9893#[derive(Copy, Clone, PartialEq, Eq, Debug)]
9894pub enum OptLevel {
9895    O0,
9896    O1,
9897    O2,
9898    Oexperimental,
9899}
9900
9901pub struct Config {
9902    strip: bool,
9903    opt_level: OptLevel,
9904    inline_threshold: usize,
9905    elide_unnecessary_loads: bool,
9906    dispatch_table: Vec<Vec<u8>>,
9907    min_stack_size: u32,
9908    gas_cost_model_aware_optimizations: bool,
9909}
9910
9911impl Default for Config {
9912    fn default() -> Self {
9913        Config {
9914            strip: false,
9915            opt_level: OptLevel::O2,
9916            inline_threshold: 2,
9917            elide_unnecessary_loads: true,
9918            dispatch_table: Vec::new(),
9919            min_stack_size: VM_MIN_PAGE_SIZE * 2,
9920            gas_cost_model_aware_optimizations: true,
9921        }
9922    }
9923}
9924
9925impl Config {
9926    pub fn set_strip(&mut self, value: bool) -> &mut Self {
9927        self.strip = value;
9928        self
9929    }
9930
9931    pub fn set_optimize(&mut self, value: bool) -> &mut Self {
9932        self.opt_level = if value { OptLevel::O2 } else { OptLevel::O0 };
9933        self
9934    }
9935
9936    pub fn set_opt_level(&mut self, value: OptLevel) -> &mut Self {
9937        self.opt_level = value;
9938        self
9939    }
9940
9941    pub fn set_inline_threshold(&mut self, value: usize) -> &mut Self {
9942        self.inline_threshold = value;
9943        self
9944    }
9945
9946    pub fn set_elide_unnecessary_loads(&mut self, value: bool) -> &mut Self {
9947        self.elide_unnecessary_loads = value;
9948        self
9949    }
9950
9951    pub fn set_dispatch_table(&mut self, dispatch_table: Vec<Vec<u8>>) -> &mut Self {
9952        self.dispatch_table = dispatch_table;
9953        self
9954    }
9955
9956    pub fn set_min_stack_size(&mut self, value: u32) -> &mut Self {
9957        self.min_stack_size = value;
9958        self
9959    }
9960
9961    pub fn set_enable_gas_cost_model_aware_optimizations(&mut self, value: bool) -> &mut Self {
9962        self.gas_cost_model_aware_optimizations = value;
9963        self
9964    }
9965}
9966
9967#[derive(Copy, Clone, PartialEq, Eq, Debug)]
9968#[non_exhaustive]
9969pub enum TargetInstructionSet {
9970    ReviveV1,
9971    JamV1,
9972    Latest,
9973}
9974
9975pub fn program_from_elf(config: Config, isa: TargetInstructionSet, data: &[u8]) -> Result<Vec<u8>, ProgramFromElfError> {
9976    match Elf::parse::<object::elf::FileHeader32<object::endian::LittleEndian>>(data) {
9977        Ok(elf) => program_from_elf_internal(config, isa, elf),
9978        Err(ProgramFromElfError(ProgramFromElfErrorKind::FailedToParseElf(e))) if e.to_string() == "Unsupported ELF header" => {
9979            let elf = Elf::parse::<object::elf::FileHeader64<object::endian::LittleEndian>>(data)?;
9980            program_from_elf_internal(config, isa, elf)
9981        }
9982        Err(e) => Err(e),
9983    }
9984}
9985
9986fn program_from_elf_internal(config: Config, isa: TargetInstructionSet, mut elf: Elf) -> Result<Vec<u8>, ProgramFromElfError> {
9987    let is_rv64 = elf.is_64();
9988    let bitness = if is_rv64 { Bitness::B64 } else { Bitness::B32 };
9989
9990    let isa = match isa {
9991        TargetInstructionSet::ReviveV1 => {
9992            if !is_rv64 {
9993                return Err(ProgramFromElfError::other("the ReviveV1 ISA only supports 64-bit"));
9994            }
9995
9996            InstructionSetKind::ReviveV1
9997        }
9998        TargetInstructionSet::JamV1 => {
9999            if !is_rv64 {
10000                return Err(ProgramFromElfError::other("the JamV1 ISA only supports 64-bit"));
10001            }
10002
10003            InstructionSetKind::JamV1
10004        }
10005        TargetInstructionSet::Latest => {
10006            if is_rv64 {
10007                InstructionSetKind::Latest64
10008            } else {
10009                InstructionSetKind::Latest32
10010            }
10011        }
10012    };
10013
10014    if elf.section_by_name(".got").next().is_none() {
10015        elf.add_empty_data_section(".got");
10016    }
10017
10018    let mut decoder_config = DecoderConfig::new_32bit();
10019    decoder_config.set_rv64(elf.is_64());
10020
10021    let mut sections_ro_data = Vec::new();
10022    let mut sections_rw_data = Vec::new();
10023    let mut sections_bss = Vec::new();
10024    let mut sections_code = Vec::new();
10025    let mut sections_metadata = Vec::new();
10026    let mut sections_exports = Vec::new();
10027    let mut sections_min_stack_size = Vec::new();
10028    let mut sections_other = Vec::new();
10029
10030    let mut section_map = RangeMap::new();
10031
10032    log::trace!("ELF sections:");
10033    for section in elf.sections() {
10034        let name = section.name();
10035        let is_writable = section.is_writable();
10036        let kind = section.elf_section_type();
10037
10038        log::trace!(
10039            " {}: 0x{:08x}..0x{:08x}: {} [ty={}] ({} bytes)",
10040            section.index(),
10041            section.original_address(),
10042            section.original_address() + section.size(),
10043            name,
10044            kind,
10045            section.size()
10046        );
10047
10048        if section.is_allocated() && section.original_address() != 0 {
10049            section_map.insert(
10050                section.original_address()..section.original_address() + section.size(),
10051                section.index(),
10052            );
10053        }
10054
10055        if name == ".rodata"
10056            || name.starts_with(".rodata.")
10057            || name.starts_with(".srodata.")
10058            || name == ".data.rel.ro"
10059            || name.starts_with(".data.rel.ro.")
10060            || name == ".got"
10061            || name == ".got.plt"
10062            || name == ".relro_padding"
10063        {
10064            if name == ".rodata" && is_writable {
10065                return Err(ProgramFromElfError::other(format!(
10066                    "expected section '{name}' to be read-only, yet it is writable"
10067                )));
10068            }
10069
10070            sections_ro_data.push(section.index());
10071        } else if name == ".data" || name.starts_with(".data.") || name == ".sdata" || name.starts_with(".sdata.") {
10072            if !is_writable {
10073                return Err(ProgramFromElfError::other(format!(
10074                    "expected section '{name}' to be writable, yet it is read-only"
10075                )));
10076            }
10077
10078            sections_rw_data.push(section.index());
10079        } else if name == ".bss" || name.starts_with(".bss.") || name == ".sbss" || name.starts_with(".sbss.") {
10080            if !is_writable {
10081                return Err(ProgramFromElfError::other(format!(
10082                    "expected section '{name}' to be writable, yet it is read-only"
10083                )));
10084            }
10085
10086            sections_bss.push(section.index());
10087        } else if name == ".text" || name.starts_with(".text.") || (section.is_allocated() && section.is_executable()) {
10088            if is_writable {
10089                return Err(ProgramFromElfError::other(format!(
10090                    "expected section '{name}' to be read-only, yet it is writable"
10091                )));
10092            }
10093
10094            sections_code.push(section.index());
10095        } else if name == ".polkavm_metadata" {
10096            sections_metadata.push(section.index());
10097        } else if name == ".polkavm_exports" {
10098            sections_exports.push(section.index());
10099        } else if name == ".polkavm_min_stack_size" {
10100            sections_min_stack_size.push(section.index());
10101        } else if name == ".eh_frame" || name == ".got" || name == ".dynsym" || name == ".dynstr" || name == ".dynamic" {
10102            continue;
10103        } else if section.is_allocated() {
10104            if matches!(
10105                kind,
10106                object::elf::SHT_HASH
10107                    | object::elf::SHT_GNU_HASH
10108                    | object::elf::SHT_DYNSYM
10109                    | object::elf::SHT_STRTAB
10110                    | object::elf::SHT_RELA
10111            ) {
10112                log::trace!(" {}: '{name}': skipping", section.index());
10113                continue;
10114            }
10115
10116            if section.is_progbits() && !section.is_writable() && !section.is_executable() {
10117                sections_ro_data.push(section.index());
10118                log::trace!(" {}: '{name}': autodetected as RO data", section.index());
10119                continue;
10120            }
10121
10122            if section.is_progbits() && section.is_writable() && !section.is_executable() {
10123                sections_rw_data.push(section.index());
10124                log::trace!(" {}: '{name}': autodetected as RW data", section.index());
10125                continue;
10126            }
10127
10128            if section.is_nobits() && section.is_writable() && !section.is_executable() {
10129                sections_bss.push(section.index());
10130                log::trace!(" {}: '{name}': autodetected as BSS", section.index());
10131                continue;
10132            }
10133
10134            if section.is_progbits() && !section.is_writable() && section.is_executable() {
10135                sections_code.push(section.index());
10136                log::trace!(" {}: '{name}': autodetected as code", section.index());
10137                continue;
10138            }
10139
10140            // We're supposed to load this section into memory at runtime, but we don't know what it is.
10141            return Err(ProgramFromElfErrorKind::UnsupportedSection(name.to_owned()).into());
10142        } else {
10143            sections_other.push(section.index());
10144        }
10145    }
10146
10147    if sections_code.is_empty() {
10148        return Err(ProgramFromElfError::other(
10149            "the program contains no code (linking empty programs is not supported!)",
10150        ));
10151    }
10152
10153    let section_regspill = elf.add_empty_data_section(".regspill");
10154    sections_rw_data.insert(0, section_regspill);
10155
10156    let code_sections_set: HashSet<SectionIndex> = sections_code.iter().copied().collect();
10157    let data_sections = sections_ro_data
10158        .iter()
10159        .chain(sections_rw_data.iter())
10160        .chain(sections_bss.iter()) // Shouldn't need relocations, but just in case.
10161        .chain(sections_other.iter())
10162        .chain(sections_metadata.iter())
10163        .chain(sections_exports.iter())
10164        .copied();
10165
10166    let mut relocations = BTreeMap::new();
10167    for section_index in data_sections {
10168        let section = elf.section_by_index(section_index);
10169        harvest_data_relocations(&elf, &code_sections_set, section, &mut relocations)?;
10170    }
10171
10172    let mut instruction_overrides = HashMap::new();
10173    for &section_index in &sections_code {
10174        let section = elf.section_by_index(section_index);
10175        harvest_code_relocations(&elf, section, &decoder_config, &mut instruction_overrides, &mut relocations)?;
10176    }
10177
10178    let exports = sections_exports
10179        .iter()
10180        .map(|&section_index| {
10181            let section = elf.section_by_index(section_index);
10182            extract_exports(&elf, &relocations, section)
10183        })
10184        .collect::<Result<Vec<_>, _>>()?;
10185    let mut exports: Vec<_> = exports.into_iter().flatten().collect();
10186
10187    let mut instructions = Vec::new();
10188    let mut imports = Vec::new();
10189    let mut metadata_to_nth_import = HashMap::new();
10190
10191    for &section_index in &sections_code {
10192        let section = elf.section_by_index(section_index);
10193        let initial_instruction_count = instructions.len();
10194        parse_code_section(
10195            &elf,
10196            section,
10197            &decoder_config,
10198            &relocations,
10199            &mut imports,
10200            &mut metadata_to_nth_import,
10201            &mut instruction_overrides,
10202            &mut instructions,
10203            config.opt_level,
10204        )?;
10205
10206        if instructions.len() > initial_instruction_count {
10207            // Sometimes a section ends with a `call`, which (considering sections can be reordered) would put
10208            // the return address out of bounds of the section, so let's inject an `unimp` here to make sure this doesn't happen.
10209            //
10210            // If it ends up being unnecessary the optimizer will remove it anyway.
10211            let last_source = instructions.last().unwrap().0;
10212            let source = Source {
10213                section_index: last_source.section_index,
10214                offset_range: (last_source.offset_range.end..last_source.offset_range.end + 4).into(),
10215            };
10216            instructions.push((source, InstExt::Control(ControlInst::Unimplemented)));
10217        }
10218    }
10219
10220    if !instruction_overrides.is_empty() {
10221        return Err(ProgramFromElfError::other("internal error: instruction overrides map is not empty"));
10222    }
10223
10224    core::mem::drop(instruction_overrides);
10225
10226    assert!(instructions
10227        .iter()
10228        .all(|(source, _)| source.offset_range.start < source.offset_range.end));
10229
10230    {
10231        let strip_relocations_for_sections: HashSet<_> =
10232            sections_metadata.iter().copied().chain(sections_exports.iter().copied()).collect();
10233
10234        relocations.retain(|relocation_target, _| !strip_relocations_for_sections.contains(&relocation_target.section_index));
10235    }
10236
10237    let data_sections_set: HashSet<SectionIndex> = sections_ro_data
10238        .iter()
10239        .chain(sections_rw_data.iter())
10240        .chain(sections_bss.iter()) // Shouldn't need relocations, but just in case.
10241        .copied()
10242        .collect();
10243
10244    let section_to_function_name = elf.section_to_function_name();
10245    let all_jump_targets = harvest_all_jump_targets(&elf, &data_sections_set, &code_sections_set, &instructions, &relocations, &exports)?;
10246    let all_blocks = split_code_into_basic_blocks(&elf, &section_to_function_name, &all_jump_targets, instructions)?;
10247    for block in &all_blocks {
10248        for source in block.next.source.as_slice() {
10249            assert!(source.offset_range.start < source.offset_range.end);
10250        }
10251    }
10252
10253    let mut section_to_block = build_section_to_block_map(&all_blocks)?;
10254    let mut all_blocks = resolve_basic_block_references(&data_sections_set, &section_to_block, &all_blocks)?;
10255    let mut reachability_graph;
10256    let mut used_blocks;
10257
10258    let mut regspill_size = 0;
10259    let mut info_for_block = Vec::new();
10260    if matches!(config.opt_level, OptLevel::O1 | OptLevel::O2 | OptLevel::Oexperimental) {
10261        reachability_graph = calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations)?;
10262        if matches!(config.opt_level, OptLevel::O2 | OptLevel::Oexperimental) {
10263            info_for_block = optimize_program(&config, &elf, isa, &imports, &mut all_blocks, &mut reachability_graph, &mut exports);
10264        } else {
10265            for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
10266                perform_nop_elimination(&mut all_blocks, current);
10267                perform_meta_instruction_lowering(is_rv64, &mut all_blocks, current);
10268            }
10269        }
10270        used_blocks = collect_used_blocks(&all_blocks, &reachability_graph);
10271        spill_fake_registers(
10272            section_regspill,
10273            &mut all_blocks,
10274            &mut reachability_graph,
10275            &imports,
10276            &used_blocks,
10277            &mut regspill_size,
10278            is_rv64,
10279        );
10280        used_blocks = add_missing_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, used_blocks);
10281        merge_consecutive_fallthrough_blocks(&mut all_blocks, &mut reachability_graph, &mut section_to_block, &mut used_blocks);
10282        if matches!(config.opt_level, OptLevel::O2 | OptLevel::Oexperimental) {
10283            replace_immediates_with_registers(&mut all_blocks, &imports, &used_blocks);
10284        }
10285
10286        let expected_reachability_graph =
10287            calculate_reachability(&section_to_block, &all_blocks, &data_sections_set, &exports, &relocations)?;
10288        if reachability_graph != expected_reachability_graph {
10289            if std::env::var("POLKAVM_LINKER_DUMP_REACHABILITY_GRAPH")
10290                .map(|value| value == "1")
10291                .unwrap_or(false)
10292            {
10293                let _ = std::fs::write("/tmp/reachability_graph_actual.txt", format!("{reachability_graph:#?}"));
10294                let _ = std::fs::write("/tmp/reachability_graph_expected.txt", format!("{expected_reachability_graph:#?}"));
10295            }
10296            panic!("internal error: inconsistent reachability after optimization; this is a bug, please report it!");
10297        }
10298    } else {
10299        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
10300            perform_nop_elimination(&mut all_blocks, current);
10301        }
10302
10303        reachability_graph = ReachabilityGraph::default();
10304        for current in (0..all_blocks.len()).map(BlockTarget::from_raw) {
10305            let reachability = reachability_graph.for_code.entry(current).or_default();
10306
10307            reachability.always_reachable = true;
10308            reachability.always_dynamically_reachable = true;
10309        }
10310
10311        for &section_index in sections_ro_data.iter().chain(sections_rw_data.iter()) {
10312            let reachability = reachability_graph.for_data.entry(section_index).or_default();
10313
10314            reachability.always_reachable = true;
10315            reachability.always_dynamically_reachable = true;
10316        }
10317
10318        for (export_index, export) in exports.iter().enumerate() {
10319            let Some(&block_target) = section_to_block.get(&export.location) else {
10320                return Err(ProgramFromElfError::other("export points to a non-block"));
10321            };
10322
10323            reachability_graph
10324                .for_code
10325                .entry(block_target)
10326                .or_default()
10327                .exports
10328                .push(export_index);
10329        }
10330
10331        used_blocks = (0..all_blocks.len()).map(BlockTarget::from_raw).collect();
10332        spill_fake_registers(
10333            section_regspill,
10334            &mut all_blocks,
10335            &mut reachability_graph,
10336            &imports,
10337            &used_blocks,
10338            &mut regspill_size,
10339            is_rv64,
10340        );
10341    }
10342
10343    elf.extend_section_to_at_least(section_regspill, regspill_size);
10344
10345    for &section_index in &sections_other {
10346        if reachability_graph.is_data_section_reachable(section_index) {
10347            return Err(ProgramFromElfError::other(format!(
10348                "unsupported section used in program graph: '{name}'",
10349                name = elf.section_by_index(section_index).name(),
10350            )));
10351        }
10352    }
10353
10354    log::debug!("Exports found: {}", exports.len());
10355
10356    {
10357        let mut count_dynamic = 0;
10358        for reachability in reachability_graph.for_code.values() {
10359            if reachability.is_dynamically_reachable() {
10360                count_dynamic += 1;
10361            }
10362        }
10363        log::debug!(
10364            "Blocks used: {}/{} ({} dynamically reachable, {} statically reachable)",
10365            reachability_graph.for_code.len(),
10366            all_blocks.len(),
10367            count_dynamic,
10368            reachability_graph.for_code.len() - count_dynamic
10369        );
10370    }
10371
10372    let section_got = elf.add_empty_data_section(".got");
10373    sections_ro_data.push(section_got);
10374    reachability_graph.mark_data_section_reachable(section_got);
10375
10376    let mut target_to_got_offset: HashMap<AnyTarget, u64> = HashMap::new();
10377    let mut got_size = 0;
10378
10379    let mut used_imports = HashSet::new();
10380    for block in &all_blocks {
10381        if !reachability_graph.is_code_reachable(block.target) {
10382            continue;
10383        }
10384
10385        for (_, instruction) in &block.ops {
10386            match instruction {
10387                BasicInst::LoadAddressIndirect { target, .. } => {
10388                    if target_to_got_offset.contains_key(target) {
10389                        continue;
10390                    }
10391
10392                    let offset = target_to_got_offset.len() as u64 * u64::from(bitness);
10393                    target_to_got_offset.insert(*target, offset);
10394                    got_size = offset + u64::from(bitness);
10395
10396                    let target = match target {
10397                        AnyTarget::Data(target) => *target,
10398                        AnyTarget::Code(target) => all_blocks[target.index()].source.begin(),
10399                    };
10400
10401                    relocations.insert(
10402                        SectionTarget {
10403                            section_index: section_got,
10404                            offset,
10405                        },
10406                        RelocationKind::Abs {
10407                            target,
10408                            size: bitness.into(),
10409                        },
10410                    );
10411                }
10412                BasicInst::Ecalli { nth_import } => {
10413                    used_imports.insert(*nth_import);
10414                }
10415                _ => {}
10416            }
10417        }
10418    }
10419
10420    elf.extend_section_to_at_least(section_got, got_size.try_into().expect("overflow"));
10421    check_imports_and_assign_indexes(&mut imports, &used_imports)?;
10422
10423    let mut base_address_for_section = HashMap::new();
10424    let sections_ro_data: Vec<_> = sections_ro_data
10425        .into_iter()
10426        .filter(|section_index| reachability_graph.is_data_section_reachable(*section_index))
10427        .collect();
10428
10429    let sections_rw_data: Vec<_> = sections_rw_data
10430        .into_iter()
10431        .filter(|section_index| reachability_graph.is_data_section_reachable(*section_index))
10432        .collect();
10433
10434    let memory_config = extract_memory_config(
10435        &elf,
10436        &sections_ro_data,
10437        &sections_rw_data,
10438        &sections_bss,
10439        &sections_min_stack_size,
10440        &mut base_address_for_section,
10441        config.min_stack_size,
10442    )?;
10443
10444    log::trace!("Memory configuration: {:#?}", memory_config);
10445
10446    let (jump_table, jump_target_for_block) = build_jump_table(all_blocks.len(), &used_blocks, &reachability_graph);
10447    let (code, mut block_offsets) = emit_code(
10448        &section_to_function_name,
10449        &imports,
10450        &base_address_for_section,
10451        section_got,
10452        &target_to_got_offset,
10453        &all_blocks,
10454        &used_blocks,
10455        &used_imports,
10456        &jump_target_for_block,
10457        matches!(config.opt_level, OptLevel::O2 | OptLevel::Oexperimental),
10458        is_rv64,
10459        memory_config.heap_base,
10460    )?;
10461
10462    let mut blocks_with_zero_cost_unlikely = HashSet::new();
10463
10464    assert_eq!(block_offsets.len(), used_blocks.len());
10465    block_offsets.push(code.len());
10466
10467    if isa.supports_opcode(Opcode::unlikely) && config.gas_cost_model_aware_optimizations {
10468        log::debug!("Performing gas cost model aware optimizations...");
10469        for (current, range) in used_blocks.iter().copied().zip(block_offsets.windows(2).map(|w| w[0]..w[1])) {
10470            use polkavm_common::program::ParsedInstruction;
10471            use polkavm_common::simulator::{CacheModel, Simulator};
10472            use polkavm_common::utils::{GasVisitorT, B64};
10473
10474            fn inst(instruction: Instruction) -> ParsedInstruction {
10475                ParsedInstruction {
10476                    kind: instruction,
10477                    offset: ProgramCounter(0),
10478                    next_offset: ProgramCounter(0),
10479                }
10480            }
10481
10482            let mut simulator = Simulator::<B64, ()>::new(&[], isa, CacheModel::L2Hit, ());
10483            simulator.set_force_branch_is_cheap(Some(false));
10484
10485            log::trace!("Simulating block {}..{}...", range.start, range.end);
10486            let mut fmt = InstructionFormat::default();
10487            fmt.is_64_bit = is_rv64;
10488
10489            for (_, instruction) in &code[range.clone()] {
10490                log::trace!("  {}", instruction.display(&fmt));
10491                inst(*instruction).visit_parsing(&mut simulator);
10492            }
10493            let cost_without_unlikely = simulator.take_block_cost().unwrap();
10494            log::trace!("Cost without unlikely: {cost_without_unlikely}");
10495
10496            inst(Instruction::unlikely).visit_parsing(&mut simulator);
10497            for (_, instruction) in &code[range] {
10498                inst(*instruction).visit_parsing(&mut simulator);
10499            }
10500            let cost_with_unlikely = simulator.take_block_cost().unwrap();
10501            log::trace!("Cost with unlikely: {cost_with_unlikely}");
10502
10503            if cost_with_unlikely == cost_without_unlikely {
10504                blocks_with_zero_cost_unlikely.insert(current);
10505            }
10506        }
10507    }
10508
10509    if isa.supports_opcode(Opcode::unlikely) {
10510        for &current in &used_blocks {
10511            if !blocks_with_zero_cost_unlikely.contains(&current) {
10512                continue;
10513            }
10514
10515            let Some(reachability) = reachability_graph.for_code.get(&current) else {
10516                continue;
10517            };
10518            if !reachability.is_only_statically_reachable() {
10519                continue;
10520            }
10521
10522            if !reachability
10523                .reachable_from
10524                .iter()
10525                .any(|source| matches!(all_blocks[source.index()].next.instruction, ControlInst::Branch { .. }))
10526            {
10527                continue;
10528            }
10529
10530            all_blocks[current.index()].is_unlikely = true;
10531        }
10532
10533        if !info_for_block.is_empty() {
10534            for &current in &used_blocks {
10535                let ControlInst::Branch {
10536                    target_true, target_false, ..
10537                } = all_blocks[current.index()].next.instruction
10538                else {
10539                    continue;
10540                };
10541
10542                let terminator_true = &info_for_block[target_true.index()].terminator;
10543                let terminator_false = &info_for_block[target_false.index()].terminator;
10544
10545                if terminator_true.contains_infinite_loop()
10546                    && !terminator_false.contains_infinite_loop()
10547                    && target_true.index() < target_false.index()
10548                    && !all_blocks[target_true.index()].is_unlikely
10549                {
10550                    if let Some(reachability) = reachability_graph.for_code.get(&target_false) {
10551                        if reachability.is_only_reachable_from(current) {
10552                            all_blocks[target_false.index()].is_unlikely = true;
10553                            continue;
10554                        }
10555                    }
10556                }
10557            }
10558        }
10559    }
10560
10561    let (code, _) = emit_code(
10562        &section_to_function_name,
10563        &imports,
10564        &base_address_for_section,
10565        section_got,
10566        &target_to_got_offset,
10567        &all_blocks,
10568        &used_blocks,
10569        &used_imports,
10570        &jump_target_for_block,
10571        matches!(config.opt_level, OptLevel::O2 | OptLevel::Oexperimental),
10572        is_rv64,
10573        memory_config.heap_base,
10574    )?;
10575
10576    {
10577        // Assign dummy base addresses to all other sections.
10578        //
10579        // This is mostly used for debug info.
10580        for &section_index in &sections_other {
10581            let address = elf.section_by_index(section_index).original_address();
10582            assert!(!reachability_graph.is_data_section_reachable(section_index));
10583            assert!(base_address_for_section.insert(section_index, address).is_none());
10584        }
10585    }
10586
10587    for (&relocation_target, &relocation) in &relocations {
10588        let section = elf.section_by_index(relocation_target.section_index);
10589        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
10590            continue;
10591        }
10592
10593        log::trace!(
10594            "Applying relocation to '{}'[0x{:x}] {relocation_target}: {:?}",
10595            section.name(),
10596            relocation_target.offset,
10597            relocation
10598        );
10599
10600        fn write_generic(size: RelocationSize, data: &mut [u8], relative_address: u64, value: u64) -> Result<(), ProgramFromElfError> {
10601            match size {
10602                RelocationSize::U64 => write_u64(data, relative_address, value),
10603                RelocationSize::U32 => {
10604                    let Ok(value) = u32::try_from(value) else {
10605                        return Err(ProgramFromElfError::other(
10606                            "overflow when applying relocations: value doesn't fit in an u32",
10607                        ));
10608                    };
10609
10610                    write_u32(data, relative_address, value)
10611                }
10612                RelocationSize::U16 => {
10613                    let Ok(value) = u16::try_from(value) else {
10614                        return Err(ProgramFromElfError::other(
10615                            "overflow when applying relocations: value doesn't fit in an u16",
10616                        ));
10617                    };
10618
10619                    write_u16(data, relative_address, value)
10620                }
10621                RelocationSize::U8 => {
10622                    let Ok(value) = u8::try_from(value) else {
10623                        return Err(ProgramFromElfError::other(
10624                            "overflow when applying relocations: value doesn't fit in an u8",
10625                        ));
10626                    };
10627
10628                    data[relative_address as usize] = value;
10629                    Ok(())
10630                }
10631            }
10632        }
10633
10634        match relocation {
10635            RelocationKind::Offset { origin, target, size } => {
10636                // These relocations should only be used in debug info sections and RO data sections.
10637                if reachability_graph.is_data_section_reachable(section.index()) && !matches!(size, SizeRelocationSize::Generic(..)) {
10638                    return Err(ProgramFromElfError::other(format!(
10639                        "relocation was not expected in section '{name}': {relocation:?}",
10640                        name = section.name(),
10641                    )));
10642                }
10643
10644                let Some(&origin_section_address) = base_address_for_section.get(&origin.section_index) else {
10645                    return Err(ProgramFromElfError::other(format!(
10646                        "internal error: relocation in '{name}' ({relocation_target}) refers to an origin section that doesn't have a base address assigned: origin = '{origin_name}' ({origin}), target = '{target_name}' ({target}), size = {size:?}",
10647                        name = section.name(),
10648                        origin_name = elf.section_by_index(origin.section_index).name(),
10649                        target_name = elf.section_by_index(target.section_index).name(),
10650                    )));
10651                };
10652
10653                let Some(&target_section_address) = base_address_for_section.get(&target.section_index) else {
10654                    return Err(ProgramFromElfError::other(format!(
10655                        "internal error: relocation in '{name}' ({relocation_target}) refers to a target section that doesn't have a base address assigned: origin = '{origin_name}' ({origin}), target = '{target_name}' ({target}), size = {size:?}",
10656                        name = section.name(),
10657                        origin_name = elf.section_by_index(origin.section_index).name(),
10658                        target_name = elf.section_by_index(target.section_index).name(),
10659                    )));
10660                };
10661
10662                let range = origin_section_address.wrapping_add(origin.offset)..target_section_address.wrapping_add(target.offset);
10663                let data = elf.section_data_mut(relocation_target.section_index);
10664                let mut value = range.end.wrapping_sub(range.start);
10665                match size {
10666                    SizeRelocationSize::Uleb128 => {
10667                        overwrite_uleb128(data, relocation_target.offset as usize, value)?;
10668                    }
10669                    SizeRelocationSize::SixBits => {
10670                        let mask = 0b00111111;
10671                        if value > mask {
10672                            return Err(ProgramFromElfError::other("six bit relocation overflow"));
10673                        }
10674
10675                        let output = (u64::from(read_u8(data, relocation_target.offset)?) & (!mask)) | (value & mask);
10676                        data[relocation_target.offset as usize] = output as u8;
10677                    }
10678                    SizeRelocationSize::Generic(size) => {
10679                        if range.end < range.start {
10680                            match size {
10681                                RelocationSize::U8 => {
10682                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
10683                                        let new_value: i8 = new_value;
10684                                        value = cast(cast(new_value).to_unsigned()).to_u64();
10685                                    }
10686                                }
10687                                RelocationSize::U16 => {
10688                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
10689                                        let new_value: i16 = new_value;
10690                                        value = cast(cast(new_value).to_unsigned()).to_u64();
10691                                    }
10692                                }
10693                                RelocationSize::U32 => {
10694                                    if let Ok(new_value) = cast(value).to_signed().try_into() {
10695                                        let new_value: i32 = new_value;
10696                                        value = cast(cast(new_value).to_unsigned()).to_u64();
10697                                    }
10698                                }
10699                                RelocationSize::U64 => {}
10700                            }
10701                        }
10702
10703                        write_generic(size, data, relocation_target.offset, value)?;
10704                    }
10705                }
10706            }
10707            RelocationKind::Abs { target, size } => {
10708                if let Some(&block_target) = section_to_block.get(&target) {
10709                    let Some(jump_target) = jump_target_for_block[block_target.index()] else {
10710                        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
10711                            // Most likely debug info for something that was stripped out.
10712                            let data = elf.section_data_mut(relocation_target.section_index);
10713                            write_generic(size, data, relocation_target.offset, 0)?;
10714                            continue;
10715                        }
10716
10717                        return Err(ProgramFromElfError::other(format!(
10718                            "absolute relocation in section '{location_name}' targets section '{target_name}'[0x{target_offset:x}] which has no associated basic block",
10719                            location_name = elf.section_by_index(relocation_target.section_index).name(),
10720                            target_name = elf.section_by_index(target.section_index).name(),
10721                            target_offset = target.offset,
10722                        )));
10723                    };
10724
10725                    let jump_target = jump_target.dynamic_target.expect("missing jump target for address");
10726                    let Some(jump_target) = jump_target.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
10727                        return Err(ProgramFromElfError::other("overflow when applying a jump target relocation"));
10728                    };
10729
10730                    let data = elf.section_data_mut(relocation_target.section_index);
10731                    write_generic(size, data, relocation_target.offset, jump_target.into())?;
10732                } else {
10733                    let Some(section_base) = base_address_for_section.get(&target.section_index) else {
10734                        if !reachability_graph.is_data_section_reachable(relocation_target.section_index) {
10735                            let data = elf.section_data_mut(relocation_target.section_index);
10736                            write_generic(size, data, relocation_target.offset, 0)?;
10737                            continue;
10738                        }
10739
10740                        return Err(ProgramFromElfError::other(format!(
10741                            "absolute relocation in section '{location_name}' targets section '{target_name}'[0x{target_offset:x}] which has no relocated base address assigned",
10742                            location_name = elf.section_by_index(relocation_target.section_index).name(),
10743                            target_name = elf.section_by_index(target.section_index).name(),
10744                            target_offset = target.offset,
10745                        )));
10746                    };
10747
10748                    let Some(value) = section_base.checked_add(target.offset) else {
10749                        return Err(ProgramFromElfError::other("overflow when applying an absolute relocation"));
10750                    };
10751
10752                    let data = elf.section_data_mut(relocation_target.section_index);
10753                    write_generic(size, data, relocation_target.offset, value)?;
10754                }
10755            }
10756            RelocationKind::JumpTable { target_code, target_base } => {
10757                let Some(&block_target) = section_to_block.get(&target_code) else {
10758                    return Err(ProgramFromElfError::other(
10759                        "jump table relocation doesn't refers to a start of a basic block",
10760                    ));
10761                };
10762
10763                let Some(jump_target) = jump_target_for_block[block_target.index()] else {
10764                    return Err(ProgramFromElfError::other(
10765                        "no jump target for block was found when applying a jump table relocation",
10766                    ));
10767                };
10768
10769                let Some(section_base) = base_address_for_section.get(&target_base.section_index) else {
10770                    return Err(ProgramFromElfError::other(
10771                        "no base address for section when applying a jump table relocation",
10772                    ));
10773                };
10774
10775                let Some(base_address) = section_base.checked_add(target_base.offset) else {
10776                    return Err(ProgramFromElfError::other(
10777                        "overflow when applying a jump table relocation: section base and offset cannot be added together",
10778                    ));
10779                };
10780
10781                let Ok(base_address) = u32::try_from(base_address) else {
10782                    return Err(ProgramFromElfError::other(
10783                        "overflow when applying a jump table relocation: base address doesn't fit in a u32",
10784                    ));
10785                };
10786
10787                let jump_target = jump_target.dynamic_target.expect("missing jump target for address");
10788                let Some(jump_target) = jump_target.checked_mul(VM_CODE_ADDRESS_ALIGNMENT) else {
10789                    return Err(ProgramFromElfError::other(
10790                        "overflow when applying a jump table relocation: jump target is too big",
10791                    ));
10792                };
10793
10794                let value = jump_target.wrapping_sub(base_address);
10795                let data = elf.section_data_mut(relocation_target.section_index);
10796                write_u32(data, relocation_target.offset, value)?;
10797            }
10798        }
10799    }
10800
10801    let mut location_map: HashMap<SectionTarget, Arc<[Location]>> = HashMap::new();
10802    if !config.strip {
10803        let mut string_cache = crate::utils::StringCache::default();
10804        let dwarf_info = crate::dwarf::load_dwarf(&mut string_cache, &elf, &relocations, &section_map)?;
10805        location_map = dwarf_info.location_map;
10806
10807        // If there is no DWARF info present try to use the symbol table as a fallback.
10808        for (source, name) in parse_function_symbols(&elf)? {
10809            if location_map.contains_key(&source.begin()) {
10810                continue;
10811            }
10812
10813            let (namespace, function_name) = split_function_name(&name);
10814            let namespace = if namespace.is_empty() {
10815                None
10816            } else {
10817                Some(string_cache.dedup(&namespace))
10818            };
10819
10820            let location = Location {
10821                kind: FrameKind::Enter,
10822                namespace,
10823                function_name: Some(string_cache.dedup(&function_name)),
10824                source_code_location: None,
10825            };
10826
10827            let location_stack: Arc<[Location]> = vec![location].into();
10828            for target in source.iter() {
10829                location_map.insert(target, Arc::clone(&location_stack));
10830            }
10831        }
10832    }
10833
10834    log::trace!("Instruction count: {}", code.len());
10835
10836    let mut builder = ProgramBlobBuilder::new(isa);
10837    builder.set_ro_data_size(memory_config.ro_data_size);
10838    builder.set_rw_data_size(memory_config.rw_data_size);
10839    builder.set_stack_size(memory_config.min_stack_size);
10840
10841    let [ro_data, rw_data] = {
10842        [memory_config.ro_data, memory_config.rw_data].map(|ranges| {
10843            let mut buffer = Vec::new();
10844            for range in ranges {
10845                match range {
10846                    DataRef::Section { section_index, range } => {
10847                        let slice = &elf.section_by_index(section_index).data()[range];
10848                        buffer.extend_from_slice(slice);
10849                    }
10850                    DataRef::Padding(bytes) => {
10851                        let new_size = buffer.len() + bytes;
10852                        buffer.resize(new_size, 0);
10853                    }
10854                }
10855            }
10856            buffer
10857        })
10858    };
10859
10860    builder.set_ro_data(ro_data);
10861    builder.set_rw_data(rw_data);
10862
10863    {
10864        let mut sorted_imports = imports.clone();
10865        sorted_imports.sort_by(|a, b| {
10866            a.metadata
10867                .index
10868                .cmp(&b.metadata.index)
10869                .then_with(|| a.metadata.symbol.cmp(&b.metadata.symbol))
10870        });
10871
10872        let mut next_index = 0;
10873        for import in sorted_imports {
10874            let Some(index) = import.index else {
10875                continue;
10876            };
10877
10878            assert_eq!(index, next_index);
10879            next_index += 1;
10880
10881            builder.add_import(&import.metadata.symbol);
10882        }
10883    }
10884
10885    let mut export_count = 0;
10886    for current in used_blocks {
10887        for &export_index in &reachability_graph.for_code.get(&current).unwrap().exports {
10888            let export = &exports[export_index];
10889            let jump_target = jump_target_for_block[current.index()]
10890                .expect("internal error: export metadata points to a block without a jump target assigned");
10891
10892            builder.add_export_by_basic_block(jump_target.static_target, &export.metadata.symbol);
10893            export_count += 1;
10894        }
10895    }
10896    assert_eq!(export_count, exports.len());
10897
10898    let mut locations_for_instruction: Vec<Option<Arc<[Location]>>> = Vec::with_capacity(code.len());
10899    let mut raw_code = Vec::with_capacity(code.len());
10900
10901    for (nth_inst, (source_stack, inst)) in code.into_iter().enumerate() {
10902        raw_code.push(inst);
10903
10904        let mut function_name = None;
10905        if !config.strip {
10906            // Two or more addresses can point to the same instruction (e.g. in case of macro op fusion).
10907            // Two or more instructions can also have the same address (e.g. in case of jump targets).
10908
10909            // TODO: Use a smallvec.
10910            let mut list = Vec::new();
10911            for source in source_stack.as_slice() {
10912                for offset in (source.offset_range.start..source.offset_range.end).step_by(2) {
10913                    let target = SectionTarget {
10914                        section_index: source.section_index,
10915                        offset,
10916                    };
10917
10918                    if let Some(locations) = location_map.get(&target) {
10919                        if let Some(last) = list.last() {
10920                            if locations == last {
10921                                // If we inlined a basic block from the same function do not repeat the same location.
10922                                break;
10923                            }
10924                        } else {
10925                            function_name = locations[0].function_name.as_deref();
10926                        }
10927
10928                        list.push(Arc::clone(locations));
10929                        break;
10930                    }
10931                }
10932
10933                if list.is_empty() {
10934                    // If the toplevel source doesn't have a location don't try the lower ones.
10935                    break;
10936                }
10937            }
10938
10939            if list.is_empty() {
10940                locations_for_instruction.push(None);
10941            } else if list.len() == 1 {
10942                locations_for_instruction.push(list.into_iter().next())
10943            } else {
10944                let mut new_list = Vec::new();
10945                for sublist in list {
10946                    new_list.extend(sublist.iter().cloned());
10947                }
10948
10949                locations_for_instruction.push(Some(new_list.into()));
10950            }
10951        }
10952
10953        log::trace!(
10954            "Code: 0x{source_address:x} [{function_name}] -> {source_stack} -> #{nth_inst}: {inst}",
10955            source_address = {
10956                elf.section_by_index(source_stack.top().section_index)
10957                    .original_address()
10958                    .wrapping_add(source_stack.top().offset_range.start)
10959            },
10960            function_name = function_name.unwrap_or("")
10961        );
10962    }
10963
10964    for symbol in &config.dispatch_table {
10965        builder.add_dispatch_table_entry(symbol.clone());
10966    }
10967
10968    builder.set_code(&raw_code, &jump_table);
10969
10970    let mut offsets = Vec::new();
10971    if !config.strip {
10972        let blob = ProgramBlob::parse(builder.to_vec().map_err(ProgramFromElfError::other)?.into())?;
10973        offsets = blob
10974            .instructions()
10975            .skip(config.dispatch_table.len())
10976            .map(|instruction| (instruction.offset, instruction.next_offset))
10977            .collect();
10978        assert_eq!(offsets.len(), locations_for_instruction.len());
10979
10980        emit_debug_info(&mut builder, &locations_for_instruction, &offsets);
10981    }
10982
10983    let raw_blob = builder.to_vec().map_err(ProgramFromElfError::other)?;
10984
10985    log::debug!("Built a program of {} bytes", raw_blob.len());
10986    let blob = ProgramBlob::parse(raw_blob[..].into())?;
10987
10988    // Sanity check that our debug info was properly emitted and can be parsed.
10989    if cfg!(debug_assertions) && !config.strip {
10990        'outer: for (nth_instruction, locations) in locations_for_instruction.iter().enumerate() {
10991            let (program_counter, _) = offsets[nth_instruction];
10992            let line_program = blob.get_debug_line_program_at(program_counter).unwrap();
10993            let Some(locations) = locations else {
10994                assert!(line_program.is_none());
10995                continue;
10996            };
10997
10998            let mut line_program = line_program.unwrap();
10999            while let Some(region_info) = line_program.run().unwrap() {
11000                if !region_info.instruction_range().contains(&program_counter) {
11001                    continue;
11002                }
11003
11004                assert!(region_info.frames().len() <= locations.len());
11005                for (actual, expected) in region_info.frames().zip(locations.iter()) {
11006                    assert_eq!(actual.kind(), expected.kind);
11007                    assert_eq!(actual.namespace().unwrap(), expected.namespace.as_deref());
11008                    assert_eq!(actual.function_name_without_namespace().unwrap(), expected.function_name.as_deref());
11009                    assert_eq!(
11010                        actual.path().unwrap().map(Cow::Borrowed),
11011                        expected
11012                            .source_code_location
11013                            .as_ref()
11014                            .map(|location| simplify_path(location.path()))
11015                    );
11016                    assert_eq!(
11017                        actual.line(),
11018                        expected
11019                            .source_code_location
11020                            .as_ref()
11021                            .and_then(|location| location.line())
11022                            .and_then(|line| if line != 0 { Some(line) } else { None })
11023                    );
11024                    assert_eq!(
11025                        actual.column(),
11026                        expected
11027                            .source_code_location
11028                            .as_ref()
11029                            .and_then(|location| location.column())
11030                            .and_then(|column| if column != 0 { Some(column) } else { None })
11031                    );
11032                }
11033
11034                continue 'outer;
11035            }
11036
11037            panic!("internal error: region not found for instruction");
11038        }
11039    }
11040
11041    Ok(raw_blob)
11042}
11043
11044fn simplify_path(path: &str) -> Cow<str> {
11045    // TODO: Sanitize macOS and Windows paths.
11046    if let Some(p) = path.strip_prefix("/home/") {
11047        if let Some(index) = p.bytes().position(|byte| byte == b'/') {
11048            return format!("~{}", &p[index..]).into();
11049        }
11050    }
11051
11052    path.into()
11053}
11054
11055fn emit_debug_info(
11056    builder: &mut ProgramBlobBuilder,
11057    locations_for_instruction: &[Option<Arc<[Location]>>],
11058    offsets: &[(ProgramCounter, ProgramCounter)],
11059) {
11060    #[derive(Default)]
11061    struct DebugStringsBuilder<'a> {
11062        map: HashMap<Cow<'a, str>, u32>,
11063        section: Vec<u8>,
11064        write_protected: bool,
11065    }
11066
11067    impl<'a> DebugStringsBuilder<'a> {
11068        fn dedup_cow(&mut self, s: Cow<'a, str>) -> u32 {
11069            if let Some(offset) = self.map.get(&s) {
11070                return *offset;
11071            }
11072
11073            assert!(!self.write_protected);
11074
11075            let offset = self.section.len();
11076            let mut buffer = [0xff_u8; varint::MAX_VARINT_LENGTH];
11077            let length = varint::write_varint(s.len().try_into().expect("debug string length overflow"), &mut buffer);
11078            self.section.extend_from_slice(&buffer[..length]);
11079            self.section.extend_from_slice(s.as_bytes());
11080            let offset: u32 = offset.try_into().expect("debug string section length overflow");
11081            self.map.insert(s, offset);
11082            offset
11083        }
11084
11085        fn dedup(&mut self, s: &'a str) -> u32 {
11086            self.dedup_cow(s.into())
11087        }
11088    }
11089
11090    let mut dbg_strings = DebugStringsBuilder::default();
11091    let empty_string_id = dbg_strings.dedup("");
11092
11093    struct Group<'a> {
11094        namespace: Option<Arc<str>>,
11095        function_name: Option<Arc<str>>,
11096        path: Option<Cow<'a, str>>,
11097        instruction_position: usize,
11098        instruction_count: usize,
11099        program_counter_start: ProgramCounter,
11100        program_counter_end: ProgramCounter,
11101    }
11102
11103    impl<'a> Group<'a> {
11104        fn key(&self) -> (Option<&str>, Option<&str>, Option<&str>) {
11105            (self.namespace.as_deref(), self.function_name.as_deref(), self.path.as_deref())
11106        }
11107    }
11108
11109    let mut groups: Vec<Group> = Vec::new();
11110    for (instruction_position, locations) in locations_for_instruction.iter().enumerate() {
11111        let group = if let Some(locations) = locations {
11112            for location in locations.iter() {
11113                if let Some(ref namespace) = location.namespace {
11114                    dbg_strings.dedup(namespace);
11115                }
11116
11117                if let Some(ref name) = location.function_name {
11118                    dbg_strings.dedup(name);
11119                }
11120
11121                if let Some(ref location) = location.source_code_location {
11122                    dbg_strings.dedup_cow(simplify_path(location.path()));
11123                }
11124            }
11125
11126            let location = &locations[0];
11127            Group {
11128                namespace: location.namespace.clone(),
11129                function_name: location.function_name.clone(),
11130                path: location.source_code_location.as_ref().map(|target| simplify_path(target.path())),
11131                instruction_position,
11132                instruction_count: 1,
11133                program_counter_start: offsets[instruction_position].0,
11134                program_counter_end: offsets[instruction_position].1,
11135            }
11136        } else {
11137            Group {
11138                namespace: None,
11139                function_name: None,
11140                path: None,
11141                instruction_position,
11142                instruction_count: 1,
11143                program_counter_start: offsets[instruction_position].0,
11144                program_counter_end: offsets[instruction_position].1,
11145            }
11146        };
11147
11148        if let Some(last_group) = groups.last_mut() {
11149            if last_group.key() == group.key() {
11150                assert_eq!(last_group.instruction_position + last_group.instruction_count, instruction_position);
11151                last_group.instruction_count += 1;
11152                last_group.program_counter_end = group.program_counter_end;
11153                continue;
11154            }
11155        }
11156
11157        groups.push(group);
11158    }
11159
11160    groups.retain(|group| group.function_name.is_some() || group.path.is_some());
11161
11162    log::trace!("Location groups: {}", groups.len());
11163    dbg_strings.write_protected = true;
11164
11165    let mut section_line_programs = Vec::new();
11166    let mut info_offsets = Vec::with_capacity(groups.len());
11167    {
11168        let mut writer = Writer::new(&mut section_line_programs);
11169        let writer = &mut writer;
11170
11171        let offset_base = writer.len();
11172        writer.push_byte(program::VERSION_DEBUG_LINE_PROGRAM_V1);
11173        for group in &groups {
11174            let info_offset: u32 = (writer.len() - offset_base).try_into().expect("function info offset overflow");
11175            info_offsets.push(info_offset);
11176
11177            #[derive(Default)]
11178            struct LineProgramFrame {
11179                kind: Option<FrameKind>,
11180                namespace: Option<Arc<str>>,
11181                function_name: Option<Arc<str>>,
11182                path: Option<Arc<str>>,
11183                line: Option<u32>,
11184                column: Option<u32>,
11185            }
11186
11187            #[derive(Default)]
11188            struct LineProgramState {
11189                stack: Vec<LineProgramFrame>,
11190                stack_depth: usize,
11191                mutation_depth: usize,
11192
11193                queued_count: u32,
11194            }
11195
11196            impl LineProgramState {
11197                fn flush_if_any_are_queued(&mut self, writer: &mut Writer) {
11198                    if self.queued_count == 0 {
11199                        return;
11200                    }
11201
11202                    if self.queued_count == 1 {
11203                        writer.push_byte(LineProgramOp::FinishInstruction as u8);
11204                    } else {
11205                        writer.push_byte(LineProgramOp::FinishMultipleInstructions as u8);
11206                        writer.push_varint(self.queued_count);
11207                    }
11208
11209                    self.queued_count = 0;
11210                }
11211
11212                fn set_mutation_depth(&mut self, writer: &mut Writer, depth: usize) {
11213                    self.flush_if_any_are_queued(writer);
11214
11215                    if depth == self.mutation_depth {
11216                        return;
11217                    }
11218
11219                    writer.push_byte(LineProgramOp::SetMutationDepth as u8);
11220                    writer.push_varint(depth as u32);
11221                    self.mutation_depth = depth;
11222                }
11223
11224                fn set_stack_depth(&mut self, writer: &mut Writer, depth: usize) {
11225                    if self.stack_depth == depth {
11226                        return;
11227                    }
11228
11229                    while depth > self.stack.len() {
11230                        self.stack.push(LineProgramFrame::default());
11231                    }
11232
11233                    self.flush_if_any_are_queued(writer);
11234
11235                    writer.push_byte(LineProgramOp::SetStackDepth as u8);
11236                    writer.push_varint(depth as u32);
11237                    self.stack_depth = depth;
11238                }
11239
11240                fn finish_instruction(&mut self, writer: &mut Writer, next_depth: usize, instruction_length: u32) {
11241                    self.queued_count += instruction_length;
11242
11243                    enum Direction {
11244                        GoDown,
11245                        GoUp,
11246                    }
11247
11248                    let dir = if next_depth == self.stack_depth + 1 {
11249                        Direction::GoDown
11250                    } else if next_depth + 1 == self.stack_depth {
11251                        Direction::GoUp
11252                    } else {
11253                        return;
11254                    };
11255
11256                    while next_depth > self.stack.len() {
11257                        self.stack.push(LineProgramFrame::default());
11258                    }
11259
11260                    match (self.queued_count == 1, dir) {
11261                        (true, Direction::GoDown) => {
11262                            writer.push_byte(LineProgramOp::FinishInstructionAndIncrementStackDepth as u8);
11263                        }
11264                        (false, Direction::GoDown) => {
11265                            writer.push_byte(LineProgramOp::FinishMultipleInstructionsAndIncrementStackDepth as u8);
11266                            writer.push_varint(self.queued_count);
11267                        }
11268                        (true, Direction::GoUp) => {
11269                            writer.push_byte(LineProgramOp::FinishInstructionAndDecrementStackDepth as u8);
11270                        }
11271                        (false, Direction::GoUp) => {
11272                            writer.push_byte(LineProgramOp::FinishMultipleInstructionsAndDecrementStackDepth as u8);
11273                            writer.push_varint(self.queued_count);
11274                        }
11275                    }
11276
11277                    self.stack_depth = next_depth;
11278                    self.queued_count = 0;
11279                }
11280            }
11281
11282            let mut state = LineProgramState::default();
11283            for nth_instruction in group.instruction_position..group.instruction_position + group.instruction_count {
11284                let locations = locations_for_instruction[nth_instruction].as_ref().unwrap();
11285                state.set_stack_depth(writer, locations.len());
11286
11287                for (depth, location) in locations.iter().enumerate() {
11288                    let new_path = location
11289                        .source_code_location
11290                        .as_ref()
11291                        .map(|location| simplify_path(location.path()));
11292                    let new_line = location.source_code_location.as_ref().and_then(|location| location.line());
11293                    let new_column = location.source_code_location.as_ref().and_then(|location| location.column());
11294
11295                    let changed_kind = state.stack[depth].kind != Some(location.kind);
11296                    let changed_namespace = state.stack[depth].namespace != location.namespace;
11297                    let changed_function_name = state.stack[depth].function_name != location.function_name;
11298                    let changed_path = state.stack[depth].path.as_deref().map(Cow::Borrowed) != new_path;
11299                    let changed_line = state.stack[depth].line != new_line;
11300                    let changed_column = state.stack[depth].column != new_column;
11301
11302                    if changed_kind {
11303                        state.set_mutation_depth(writer, depth);
11304                        state.stack[depth].kind = Some(location.kind);
11305                        let kind = match location.kind {
11306                            FrameKind::Enter => LineProgramOp::SetKindEnter,
11307                            FrameKind::Call => LineProgramOp::SetKindCall,
11308                            FrameKind::Line => LineProgramOp::SetKindLine,
11309                        };
11310                        writer.push_byte(kind as u8);
11311                    }
11312
11313                    if changed_namespace {
11314                        state.set_mutation_depth(writer, depth);
11315                        writer.push_byte(LineProgramOp::SetNamespace as u8);
11316                        state.stack[depth].namespace = location.namespace.clone();
11317
11318                        let namespace_offset = location
11319                            .namespace
11320                            .as_ref()
11321                            .map_or(empty_string_id, |string| dbg_strings.dedup(string));
11322                        writer.push_varint(namespace_offset);
11323                    }
11324
11325                    if changed_function_name {
11326                        state.set_mutation_depth(writer, depth);
11327                        writer.push_byte(LineProgramOp::SetFunctionName as u8);
11328                        state.stack[depth].function_name = location.function_name.clone();
11329
11330                        let function_name_offset = location
11331                            .function_name
11332                            .as_ref()
11333                            .map_or(empty_string_id, |string| dbg_strings.dedup(string));
11334                        writer.push_varint(function_name_offset);
11335                    }
11336
11337                    if changed_path {
11338                        state.set_mutation_depth(writer, depth);
11339                        writer.push_byte(LineProgramOp::SetPath as u8);
11340                        state.stack[depth].path =
11341                            location
11342                                .source_code_location
11343                                .as_ref()
11344                                .map(|location| match simplify_path(location.path()) {
11345                                    Cow::Borrowed(_) => Arc::clone(location.path()),
11346                                    Cow::Owned(path) => path.into(),
11347                                });
11348
11349                        let path_offset = location
11350                            .source_code_location
11351                            .as_ref()
11352                            .map_or(empty_string_id, |location| dbg_strings.dedup_cow(simplify_path(location.path())));
11353                        writer.push_varint(path_offset);
11354                    }
11355
11356                    if changed_line {
11357                        state.set_mutation_depth(writer, depth);
11358                        match (state.stack[depth].line, new_line) {
11359                            (Some(old_value), Some(new_value)) if old_value + 1 == new_value => {
11360                                writer.push_byte(LineProgramOp::IncrementLine as u8);
11361                            }
11362                            (Some(old_value), Some(new_value)) if new_value > old_value => {
11363                                writer.push_byte(LineProgramOp::AddLine as u8);
11364                                writer.push_varint(new_value - old_value);
11365                            }
11366                            (Some(old_value), Some(new_value)) if new_value < old_value => {
11367                                writer.push_byte(LineProgramOp::SubLine as u8);
11368                                writer.push_varint(old_value - new_value);
11369                            }
11370                            _ => {
11371                                writer.push_byte(LineProgramOp::SetLine as u8);
11372                                writer.push_varint(new_line.unwrap_or(0));
11373                            }
11374                        }
11375                        state.stack[depth].line = new_line;
11376                    }
11377
11378                    if changed_column {
11379                        state.set_mutation_depth(writer, depth);
11380                        writer.push_byte(LineProgramOp::SetColumn as u8);
11381                        state.stack[depth].column = new_column;
11382                        writer.push_varint(new_column.unwrap_or(0));
11383                    }
11384                }
11385
11386                let next_depth = locations_for_instruction
11387                    .get(nth_instruction + 1)
11388                    .and_then(|next_locations| next_locations.as_ref().map(|xs| xs.len()))
11389                    .unwrap_or(0);
11390                state.finish_instruction(writer, next_depth, (offsets[nth_instruction].1).0 - (offsets[nth_instruction].0).0);
11391            }
11392
11393            state.flush_if_any_are_queued(writer);
11394            writer.push_byte(LineProgramOp::FinishProgram as u8);
11395        }
11396    }
11397
11398    assert_eq!(info_offsets.len(), groups.len());
11399
11400    let mut section_line_program_ranges = Vec::new();
11401    {
11402        let mut writer = Writer::new(&mut section_line_program_ranges);
11403        for (group, info_offset) in groups.iter().zip(info_offsets.into_iter()) {
11404            writer.push_u32(group.program_counter_start.0);
11405            writer.push_u32(group.program_counter_end.0);
11406            writer.push_u32(info_offset);
11407        }
11408    }
11409
11410    builder.add_custom_section(program::SECTION_OPT_DEBUG_STRINGS, dbg_strings.section);
11411    builder.add_custom_section(program::SECTION_OPT_DEBUG_LINE_PROGRAMS, section_line_programs);
11412    builder.add_custom_section(program::SECTION_OPT_DEBUG_LINE_PROGRAM_RANGES, section_line_program_ranges);
11413}