1#![allow(
12 clippy::cast_possible_truncation,
13 clippy::cast_possible_wrap,
14 clippy::cast_sign_loss
15)]
16
17use std::collections::{HashMap, HashSet};
18
19use inkwell::IntPredicate;
20use inkwell::basic_block::BasicBlock;
21use inkwell::values::{
22 AnyValue, AsValueRef, BasicValueEnum, FunctionValue, InstructionValue, IntValue, PhiValue,
23};
24
25use crate::pvm::Instruction;
26use crate::{Error, Result};
27
28use crate::abi::{FRAME_HEADER_SIZE, STACK_PTR_REG};
29use crate::translate::OptimizationFlags;
30
31use super::regalloc::RegAllocResult;
32
33pub struct LoweringContext {
35 pub wasm_memory_base: i32,
36 pub num_globals: usize,
37 pub param_overflow_base: i32,
39 pub function_signatures: Vec<(usize, bool)>,
40 pub type_signatures: Vec<(usize, usize)>,
41 pub function_table: Vec<u32>,
42 pub num_imported_funcs: usize,
43 pub imported_func_names: Vec<String>,
44 pub initial_memory_pages: u32,
45 pub max_memory_pages: u32,
46 pub stack_size: u32,
47 pub data_segment_offsets: HashMap<u32, u32>,
49 pub data_segment_lengths: HashMap<u32, u32>,
51 pub data_segment_length_addrs: HashMap<u32, i32>,
54 pub wasm_import_map: Option<HashMap<String, crate::translate::ImportAction>>,
56 pub optimizations: OptimizationFlags,
58}
59
60pub struct LlvmFunctionTranslation {
62 pub instructions: Vec<Instruction>,
63 pub call_fixups: Vec<LlvmCallFixup>,
64 pub indirect_call_fixups: Vec<LlvmIndirectCallFixup>,
65 pub num_call_returns: usize,
67 pub lowering_stats: FunctionLoweringStats,
69}
70
71#[derive(Debug, Clone, Default)]
73pub struct FunctionLoweringStats {
74 pub frame_size: i32,
75 pub is_leaf: bool,
76 pub pre_dse_instructions: usize,
78 pub pre_peephole_instructions: usize,
80 pub regalloc_total_values: usize,
82 pub regalloc_allocated_values: usize,
83 pub regalloc_registers_used: Vec<u8>,
85 pub regalloc_skipped_reason: Option<&'static str>,
86 pub regalloc_load_hits: usize,
88 pub regalloc_load_reloads: usize,
89 pub regalloc_load_moves: usize,
90 pub regalloc_store_hits: usize,
91 pub regalloc_store_moves: usize,
92}
93
94#[derive(Debug, Clone)]
95pub struct LlvmCallFixup {
96 pub return_addr_instr: usize,
97 pub jump_instr: usize,
98 pub target_func: u32,
99}
100
101#[derive(Debug, Clone)]
102pub struct LlvmIndirectCallFixup {
103 pub return_addr_instr: usize,
104 pub jump_ind_instr: usize,
107}
108
109#[allow(clippy::struct_excessive_bools)]
114pub struct EmitterConfig {
115 pub wasm_memory_base: i32,
117
118 pub param_overflow_base: i32,
120
121 pub register_cache_enabled: bool,
123
124 pub constant_propagation_enabled: bool,
126
127 pub icmp_fusion_enabled: bool,
129
130 pub shrink_wrap_enabled: bool,
132
133 pub cross_block_cache_enabled: bool,
135
136 pub register_allocation_enabled: bool,
138
139 pub fallthrough_jumps_enabled: bool,
141
142 pub lazy_spill_enabled: bool,
144}
145
146pub struct PvmEmitter<'ctx> {
148 pub(crate) config: EmitterConfig,
150
151 pub(crate) instructions: Vec<Instruction>,
152 pub(crate) labels: Vec<Option<usize>>,
153 pub(crate) fixups: Vec<(usize, usize)>,
154
155 pub(crate) block_labels: HashMap<BasicBlock<'ctx>, usize>,
157
158 pub(crate) value_slots: HashMap<ValKey, i32>,
160
161 pub(crate) next_slot_offset: i32,
163
164 pub(crate) frame_size: i32,
166
167 pub(crate) call_fixups: Vec<LlvmCallFixup>,
168 pub(crate) indirect_call_fixups: Vec<LlvmIndirectCallFixup>,
169
170 pub(crate) byte_offset: usize,
172
173 slot_cache: HashMap<i32, u8>,
175 reg_to_slot: [Option<i32>; 13],
177
178 pub(crate) pending_fused_icmp: Option<FusedIcmp<'ctx>>,
182
183 reg_to_const: [Option<u64>; 13],
187
188 pub(crate) next_call_return_idx: usize,
191
192 call_return_base_idx: usize,
194
195 pub(crate) block_single_pred: HashMap<BasicBlock<'ctx>, BasicBlock<'ctx>>,
198
199 pub(crate) used_callee_regs: [bool; 4],
202
203 pub(crate) callee_save_offsets: [Option<i32>; 4],
206
207 pub(crate) has_calls: bool,
210
211 pub(crate) regalloc: RegAllocResult,
213
214 alloc_reg_slot: [Option<i32>; 13],
218
219 alloc_dirty: [bool; 13],
223
224 pub(crate) regalloc_usage: RegAllocUsageStats,
226
227 pub next_block_label: Option<usize>,
231}
232
233#[derive(Clone)]
235pub struct CacheSnapshot {
236 pub slot_cache: HashMap<i32, u8>,
237 pub reg_to_slot: [Option<i32>; 13],
238 pub reg_to_const: [Option<u64>; 13],
239 pub alloc_reg_slot: [Option<i32>; 13],
240 pub alloc_dirty: [bool; 13],
241}
242
243#[derive(Debug, Clone, Default)]
245pub struct RegAllocUsageStats {
246 pub load_hits: usize,
248 pub load_reloads: usize,
250 pub load_moves: usize,
252 pub store_hits: usize,
254 pub store_moves: usize,
256}
257
258impl CacheSnapshot {
259 pub fn invalidate_reg(&mut self, reg: u8) {
262 let idx = reg as usize;
263 if let Some(slot) = self.reg_to_slot[idx].take() {
264 self.slot_cache.remove(&slot);
265 }
266 self.reg_to_const[idx] = None;
267 self.invalidate_alloc_reg(reg);
268 }
269
270 pub fn invalidate_alloc_reg(&mut self, reg: u8) {
272 let idx = reg as usize;
273 self.alloc_reg_slot[idx] = None;
274 self.alloc_dirty[idx] = false;
275 }
276}
277
278pub struct FusedIcmp<'ctx> {
280 pub predicate: IntPredicate,
281 pub lhs: BasicValueEnum<'ctx>,
282 pub rhs: BasicValueEnum<'ctx>,
283}
284
285#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
288pub struct ValKey(pub(crate) usize);
289
290pub fn val_key_int(val: IntValue<'_>) -> ValKey {
291 ValKey(val.as_value_ref() as usize)
292}
293
294pub fn val_key_basic(val: BasicValueEnum<'_>) -> ValKey {
295 ValKey(val.as_value_ref() as usize)
296}
297
298pub fn val_key_instr(val: InstructionValue<'_>) -> ValKey {
299 ValKey(val.as_value_ref() as usize)
300}
301
302impl<'ctx> PvmEmitter<'ctx> {
303 pub fn new(config: EmitterConfig, call_return_base: usize) -> Self {
304 Self {
305 config,
306 instructions: Vec::new(),
307 labels: Vec::new(),
308 fixups: Vec::new(),
309 block_labels: HashMap::new(),
310 value_slots: HashMap::new(),
311 next_slot_offset: FRAME_HEADER_SIZE,
312 frame_size: 0,
313 call_fixups: Vec::new(),
314 indirect_call_fixups: Vec::new(),
315 byte_offset: 0,
316 slot_cache: HashMap::new(),
317 reg_to_slot: [None; 13],
318 reg_to_const: [None; 13],
319 pending_fused_icmp: None,
320 block_single_pred: HashMap::new(),
321 next_call_return_idx: call_return_base,
322 call_return_base_idx: call_return_base,
323 used_callee_regs: [true; 4],
324 callee_save_offsets: [Some(8), Some(16), Some(24), Some(32)],
325 has_calls: true, regalloc: RegAllocResult::default(),
327 alloc_reg_slot: [None; 13],
328 alloc_dirty: [false; 13],
329 regalloc_usage: RegAllocUsageStats::default(),
330 next_block_label: None,
331 }
332 }
333
334 pub fn alloc_call_return_addr(&mut self) -> i32 {
337 let idx = self.next_call_return_idx;
338 self.next_call_return_idx += 1;
339 (idx + 1)
340 .checked_mul(2)
341 .and_then(|v| i32::try_from(v).ok())
342 .expect("alloc_call_return_addr: jump table index overflow (exceeds i32 range)")
343 }
344
345 pub fn num_call_returns(&self) -> usize {
347 self.next_call_return_idx - self.call_return_base_idx
348 }
349
350 pub fn alloc_label(&mut self) -> usize {
351 let id = self.labels.len();
352 self.labels.push(None);
353 id
354 }
355
356 pub fn define_label(&mut self, label: usize) {
357 if self
358 .instructions
359 .last()
360 .is_some_and(|last| !last.is_terminating())
361 {
362 self.emit(Instruction::Fallthrough);
363 }
364 self.labels[label] = Some(self.current_offset());
365 self.clear_reg_cache();
366 }
367
368 pub fn define_label_preserving_alloc(&mut self, label: usize) {
372 if self
373 .instructions
374 .last()
375 .is_some_and(|last| !last.is_terminating())
376 {
377 self.emit(Instruction::Fallthrough);
378 }
379 self.labels[label] = Some(self.current_offset());
380 self.clear_reg_cache_preserving_alloc();
381 }
382
383 pub fn current_offset(&self) -> usize {
384 self.byte_offset
385 }
386
387 pub fn emit(&mut self, instr: Instruction) {
388 if self.config.constant_propagation_enabled {
390 match &instr {
391 Instruction::LoadImm { reg, value } => {
392 let val64 = i64::from(*value) as u64;
394 if self.reg_to_const[*reg as usize] == Some(val64) {
395 return; }
397 }
398 Instruction::LoadImm64 { reg, value } => {
399 if self.reg_to_const[*reg as usize] == Some(*value) {
400 return; }
402 }
403 _ => {}
404 }
405 }
406
407 if let Some(reg) = instr.dest_reg() {
408 self.invalidate_reg(reg);
409 }
410
411 if self.config.constant_propagation_enabled {
413 match &instr {
414 Instruction::LoadImm { reg, value } => {
415 self.reg_to_const[*reg as usize] = Some(i64::from(*value) as u64);
416 }
417 Instruction::LoadImm64 { reg, value } => {
418 self.reg_to_const[*reg as usize] = Some(*value);
419 }
420 _ => {}
421 }
422 }
423
424 self.byte_offset += instr.encode().len();
425 self.instructions.push(instr);
426 }
427
428 pub fn emit_jump_to_label(&mut self, label: usize) {
429 if self.config.fallthrough_jumps_enabled && self.next_block_label == Some(label) {
432 return;
433 }
434 let fixup_idx = self.instructions.len();
435 self.fixups.push((fixup_idx, label));
436 self.emit(Instruction::Jump { offset: 0 });
437 }
438
439 pub fn emit_branch_ne_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
440 let fixup_idx = self.instructions.len();
441 self.fixups.push((fixup_idx, label));
442 self.emit(Instruction::BranchNeImm {
443 reg,
444 value,
445 offset: 0,
446 });
447 }
448
449 pub fn emit_branch_eq_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
450 let fixup_idx = self.instructions.len();
451 self.fixups.push((fixup_idx, label));
452 self.emit(Instruction::BranchEqImm {
453 reg,
454 value,
455 offset: 0,
456 });
457 }
458
459 pub fn emit_branch_lt_u_to_label(&mut self, reg1: u8, reg2: u8, label: usize) {
460 let fixup_idx = self.instructions.len();
461 self.fixups.push((fixup_idx, label));
462 self.emit(Instruction::BranchLtU {
463 reg1,
464 reg2,
465 offset: 0,
466 });
467 }
468
469 pub fn emit_branch_ge_u_to_label(&mut self, reg1: u8, reg2: u8, label: usize) {
470 let fixup_idx = self.instructions.len();
471 self.fixups.push((fixup_idx, label));
472 self.emit(Instruction::BranchGeU {
473 reg1,
474 reg2,
475 offset: 0,
476 });
477 }
478
479 pub fn emit_branch_eq_to_label(&mut self, reg1: u8, reg2: u8, label: usize) {
480 let fixup_idx = self.instructions.len();
481 self.fixups.push((fixup_idx, label));
482 self.emit(Instruction::BranchEq {
483 reg1,
484 reg2,
485 offset: 0,
486 });
487 }
488
489 pub fn emit_branch_ne_to_label(&mut self, reg1: u8, reg2: u8, label: usize) {
490 let fixup_idx = self.instructions.len();
491 self.fixups.push((fixup_idx, label));
492 self.emit(Instruction::BranchNe {
493 reg1,
494 reg2,
495 offset: 0,
496 });
497 }
498
499 pub fn emit_branch_lt_s_to_label(&mut self, reg1: u8, reg2: u8, label: usize) {
500 let fixup_idx = self.instructions.len();
501 self.fixups.push((fixup_idx, label));
502 self.emit(Instruction::BranchLtS {
503 reg1,
504 reg2,
505 offset: 0,
506 });
507 }
508
509 pub fn emit_branch_ge_s_to_label(&mut self, reg1: u8, reg2: u8, label: usize) {
510 let fixup_idx = self.instructions.len();
511 self.fixups.push((fixup_idx, label));
512 self.emit(Instruction::BranchGeS {
513 reg1,
514 reg2,
515 offset: 0,
516 });
517 }
518
519 pub fn emit_branch_lt_u_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
520 let fixup_idx = self.instructions.len();
521 self.fixups.push((fixup_idx, label));
522 self.emit(Instruction::BranchLtUImm {
523 reg,
524 value,
525 offset: 0,
526 });
527 }
528
529 pub fn emit_branch_le_u_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
530 let fixup_idx = self.instructions.len();
531 self.fixups.push((fixup_idx, label));
532 self.emit(Instruction::BranchLeUImm {
533 reg,
534 value,
535 offset: 0,
536 });
537 }
538
539 pub fn emit_branch_ge_u_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
540 let fixup_idx = self.instructions.len();
541 self.fixups.push((fixup_idx, label));
542 self.emit(Instruction::BranchGeUImm {
543 reg,
544 value,
545 offset: 0,
546 });
547 }
548
549 pub fn emit_branch_gt_u_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
550 let fixup_idx = self.instructions.len();
551 self.fixups.push((fixup_idx, label));
552 self.emit(Instruction::BranchGtUImm {
553 reg,
554 value,
555 offset: 0,
556 });
557 }
558
559 pub fn emit_branch_lt_s_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
560 let fixup_idx = self.instructions.len();
561 self.fixups.push((fixup_idx, label));
562 self.emit(Instruction::BranchLtSImm {
563 reg,
564 value,
565 offset: 0,
566 });
567 }
568
569 pub fn emit_branch_le_s_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
570 let fixup_idx = self.instructions.len();
571 self.fixups.push((fixup_idx, label));
572 self.emit(Instruction::BranchLeSImm {
573 reg,
574 value,
575 offset: 0,
576 });
577 }
578
579 pub fn emit_branch_ge_s_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
580 let fixup_idx = self.instructions.len();
581 self.fixups.push((fixup_idx, label));
582 self.emit(Instruction::BranchGeSImm {
583 reg,
584 value,
585 offset: 0,
586 });
587 }
588
589 pub fn emit_branch_gt_s_imm_to_label(&mut self, reg: u8, value: i32, label: usize) {
590 let fixup_idx = self.instructions.len();
591 self.fixups.push((fixup_idx, label));
592 self.emit(Instruction::BranchGtSImm {
593 reg,
594 value,
595 offset: 0,
596 });
597 }
598
599 pub fn alloc_slot_for_key(&mut self, key: ValKey) -> i32 {
602 let offset = self.next_slot_offset;
603 self.value_slots.insert(key, offset);
604 self.next_slot_offset += 8;
605 offset
606 }
607
608 pub fn get_slot(&self, key: ValKey) -> Option<i32> {
609 self.value_slots.get(&key).copied()
610 }
611
612 fn emit_const_to_reg(&mut self, reg: u8, value: u64) {
617 let signed = value as i64;
618 if let Ok(v32) = i32::try_from(signed) {
619 self.emit(Instruction::LoadImm { reg, value: v32 });
620 } else {
621 self.emit(Instruction::LoadImm64 { reg, value });
622 }
623 }
624
625 pub fn load_operand(&mut self, val: BasicValueEnum<'ctx>, temp_reg: u8) -> Result<()> {
629 match val {
630 BasicValueEnum::IntValue(iv) => {
631 let key = val_key_int(iv);
632 if let Some(signed_val) = iv.get_sign_extended_constant() {
633 self.emit_const_to_reg(temp_reg, signed_val as u64);
634 } else if let Some(const_val) = iv.get_zero_extended_constant() {
635 self.emit_const_to_reg(temp_reg, const_val);
637 } else if let Some(&alloc_reg) = self.regalloc.val_to_reg.get(&key) {
638 self.regalloc_usage.load_hits += 1;
639 let slot = self.get_slot(key).ok_or_else(|| {
640 Error::Internal(format!("no slot for allocated int value {key:?}"))
641 })?;
642 let reg_idx = alloc_reg as usize;
646 if self.alloc_reg_slot[reg_idx] == Some(slot) {
647 if alloc_reg != temp_reg {
649 self.regalloc_usage.load_moves += 1;
650 self.emit(Instruction::MoveReg {
651 dst: temp_reg,
652 src: alloc_reg,
653 });
654 }
655 } else if alloc_reg == temp_reg {
656 self.regalloc_usage.load_reloads += 1;
658 self.emit(Instruction::LoadIndU64 {
659 dst: alloc_reg,
660 base: STACK_PTR_REG,
661 offset: slot,
662 });
663 self.alloc_reg_slot[reg_idx] = Some(slot);
664 self.cache_slot(slot, alloc_reg);
665 } else {
666 self.regalloc_usage.load_reloads += 1;
670 self.emit(Instruction::LoadIndU64 {
671 dst: temp_reg,
672 base: STACK_PTR_REG,
673 offset: slot,
674 });
675 self.cache_slot(slot, temp_reg);
676 }
677 } else if let Some(slot) = self.get_slot(key) {
678 if let Some(&cached_reg) = self.slot_cache.get(&slot) {
680 if cached_reg != temp_reg {
681 self.emit(Instruction::MoveReg {
683 dst: temp_reg,
684 src: cached_reg,
685 });
686 }
687 } else {
689 self.emit(Instruction::LoadIndU64 {
690 dst: temp_reg,
691 base: STACK_PTR_REG,
692 offset: slot,
693 });
694 self.cache_slot(slot, temp_reg);
695 }
696 } else if iv.is_poison() || iv.is_undef() {
697 self.emit(Instruction::LoadImm {
699 reg: temp_reg,
700 value: 0,
701 });
702 } else {
703 return Err(Error::Internal(format!(
704 "no slot for int value {:?} (opcode: {:?})",
705 val_key_int(iv),
706 iv.as_instruction().map(InstructionValue::get_opcode),
707 )));
708 }
709 }
710 _ => {
711 return Err(Error::Internal(format!(
712 "cannot load non-integer value type {:?}",
713 val.get_type()
714 )));
715 }
716 }
717 Ok(())
718 }
719
720 pub fn store_to_slot(&mut self, slot_offset: i32, src_reg: u8) {
721 let alloc_reg = self.regalloc.slot_to_reg.get(&slot_offset).copied();
722 if let Some(alloc_reg) = alloc_reg {
723 let reg_idx = alloc_reg as usize;
727 if let Some(current_slot) = self.alloc_reg_slot[reg_idx]
728 && current_slot != slot_offset
729 && self.alloc_dirty[reg_idx]
730 {
731 self.spill_dirty_reg(alloc_reg);
732 }
733
734 self.regalloc_usage.store_hits += 1;
735 if src_reg != alloc_reg {
736 self.regalloc_usage.store_moves += 1;
737 self.emit(Instruction::MoveReg {
738 dst: alloc_reg,
739 src: src_reg,
740 });
741 }
742 self.alloc_reg_slot[reg_idx] = Some(slot_offset);
743
744 if self.config.lazy_spill_enabled {
745 self.alloc_dirty[reg_idx] = true;
747 self.cache_slot(slot_offset, alloc_reg);
748 return;
749 }
750 }
751 self.emit(Instruction::StoreIndU64 {
752 base: STACK_PTR_REG,
753 src: src_reg,
754 offset: slot_offset,
755 });
756 self.cache_slot(slot_offset, alloc_reg.unwrap_or(src_reg));
757 }
758
759 fn spill_dirty_reg(&mut self, reg: u8) {
766 let reg_idx = reg as usize;
767 if !self.alloc_dirty[reg_idx] {
768 return;
769 }
770 if let Some(slot) = self.alloc_reg_slot[reg_idx] {
771 let instr = Instruction::StoreIndU64 {
772 base: STACK_PTR_REG,
773 src: reg,
774 offset: slot,
775 };
776 self.byte_offset += instr.encode().len();
777 self.instructions.push(instr);
778 }
779 self.alloc_dirty[reg_idx] = false;
780 }
781
782 pub fn spill_all_dirty_regs(&mut self) {
784 let regs: Vec<u8> = self.regalloc.reg_to_slot.keys().copied().collect();
785 for reg in regs {
786 self.spill_dirty_reg(reg);
787 }
788 }
789
790 pub fn set_alloc_reg_for_slot(&mut self, reg: u8, slot: i32) {
796 let idx = reg as usize;
797 self.alloc_reg_slot[idx] = Some(slot);
798 self.alloc_dirty[idx] = true;
799 self.cache_slot(slot, reg);
800 }
801
802 pub fn is_alloc_reg_valid(&self, reg: u8, slot: i32) -> bool {
804 self.alloc_reg_slot[reg as usize] == Some(slot)
805 }
806
807 pub fn emit_raw_move(&mut self, dst: u8, src: u8) {
810 let instr = Instruction::MoveReg { dst, src };
811 self.byte_offset += instr.encode().len();
812 self.instructions.push(instr);
813 self.reg_to_const[dst as usize] = None;
816 }
817
818 pub fn spill_dirty_reg_pub(&mut self, reg: u8) {
821 self.spill_dirty_reg(reg);
822 }
823
824 pub fn spill_allocated_regs(&mut self) {
828 if self.config.lazy_spill_enabled {
829 self.spill_all_dirty_regs();
830 }
831 }
832
833 fn invalidate_allocated_regs_where(&mut self, mut pred: impl FnMut(u8) -> bool) {
834 let regs: Vec<u8> = self
835 .regalloc
836 .reg_to_slot
837 .keys()
838 .copied()
839 .filter(|&r| pred(r))
840 .collect();
841 for reg in regs {
842 self.invalidate_reg(reg);
843 }
844 }
845
846 pub fn reload_allocated_regs_after_scratch_clobber(&mut self) {
849 self.invalidate_allocated_regs_where(|r| {
850 r == crate::abi::SCRATCH1 || r == crate::abi::SCRATCH2
851 });
852 }
853
854 pub fn reload_allocated_regs_after_call(&mut self) {
862 self.reload_allocated_regs_after_call_with_arity(crate::abi::MAX_LOCAL_REGS);
863 }
864
865 pub fn reload_allocated_regs_after_call_with_arity(&mut self, num_args: usize) {
872 let clobbered_locals = num_args.min(crate::abi::MAX_LOCAL_REGS);
873 self.invalidate_allocated_regs_where(|r| {
874 r == crate::abi::SCRATCH1
875 || r == crate::abi::SCRATCH2
876 || r == crate::abi::RETURN_VALUE_REG
877 || r == crate::abi::ARGS_LEN_REG
878 || (r >= crate::abi::FIRST_LOCAL_REG
879 && r < crate::abi::FIRST_LOCAL_REG + clobbered_locals as u8)
880 });
881 }
882
883 pub fn cache_slot(&mut self, slot: i32, reg: u8) {
887 if !self.config.register_cache_enabled {
888 return;
889 }
890 if let Some(old_slot) = self.reg_to_slot[reg as usize].take() {
892 self.slot_cache.remove(&old_slot);
893 }
894 if let Some(old_reg) = self.slot_cache.insert(slot, reg) {
896 self.reg_to_slot[old_reg as usize] = None;
897 }
898 self.reg_to_slot[reg as usize] = Some(slot);
899 }
900
901 fn invalidate_reg(&mut self, reg: u8) {
905 let idx = reg as usize;
906 if let Some(slot) = self.reg_to_slot[idx].take() {
907 self.slot_cache.remove(&slot);
908 }
909 if self.regalloc.reg_to_slot.contains_key(®) {
910 if self.config.lazy_spill_enabled
912 && self.alloc_dirty[idx]
913 && let Some(slot) = self.alloc_reg_slot[idx]
914 {
915 let instr = Instruction::StoreIndU64 {
916 base: STACK_PTR_REG,
917 src: reg,
918 offset: slot,
919 };
920 self.byte_offset += instr.encode().len();
921 self.instructions.push(instr);
922 }
923 self.alloc_reg_slot[idx] = None;
924 self.alloc_dirty[idx] = false;
925 }
926 self.reg_to_const[idx] = None;
927 }
928
929 pub fn clear_reg_cache(&mut self) {
934 self.slot_cache.clear();
935 self.reg_to_slot = [None; 13];
936 self.reg_to_const = [None; 13];
937 self.clear_allocated_reg_state();
938 }
939
940 pub fn clear_reg_cache_preserving_alloc(&mut self) {
946 self.slot_cache.clear();
947 self.reg_to_slot = [None; 13];
948 self.reg_to_const = [None; 13];
949 }
951
952 fn clear_allocated_reg_state(&mut self) {
953 for ® in self.regalloc.reg_to_slot.keys() {
954 let idx = reg as usize;
955 self.alloc_reg_slot[idx] = None;
956 self.alloc_dirty[idx] = false;
957 }
958 }
959
960 pub fn set_alloc_reg_slot_from(&mut self, alloc_reg_slot: &[Option<i32>; 13]) {
963 for ® in self.regalloc.reg_to_slot.keys() {
964 self.alloc_reg_slot[reg as usize] = alloc_reg_slot[reg as usize];
965 }
966 }
967
968 pub fn set_alloc_reg_slot_filtered(
972 &mut self,
973 alloc_reg_slot: &[Option<i32>; 13],
974 filter: impl Fn(u8) -> bool,
975 ) {
976 for ® in self.regalloc.reg_to_slot.keys() {
977 if filter(reg) {
978 self.alloc_reg_slot[reg as usize] = alloc_reg_slot[reg as usize];
979 }
980 }
981 }
982
983 pub fn intersect_alloc_reg_slot(&mut self, other: &[Option<i32>; 13]) {
986 for ® in self.regalloc.reg_to_slot.keys() {
987 let idx = reg as usize;
988 if self.alloc_reg_slot[idx] != other[idx] {
989 self.alloc_reg_slot[idx] = None;
990 }
991 }
992 }
993
994 pub fn snapshot_cache(&self) -> CacheSnapshot {
996 CacheSnapshot {
997 slot_cache: self.slot_cache.clone(),
998 reg_to_slot: self.reg_to_slot,
999 reg_to_const: self.reg_to_const,
1000 alloc_reg_slot: self.alloc_reg_slot,
1001 alloc_dirty: self.alloc_dirty,
1002 }
1003 }
1004
1005 pub fn restore_cache(&mut self, snapshot: &CacheSnapshot) {
1007 self.slot_cache.clone_from(&snapshot.slot_cache);
1008 self.reg_to_slot = snapshot.reg_to_slot;
1009 self.reg_to_const = snapshot.reg_to_const;
1010 self.alloc_reg_slot = snapshot.alloc_reg_slot;
1011 self.alloc_dirty = snapshot.alloc_dirty;
1012 }
1013
1014 pub fn define_label_preserving_cache(&mut self, label: usize) {
1017 if self
1018 .instructions
1019 .last()
1020 .is_some_and(|last| !last.is_terminating())
1021 {
1022 self.emit(Instruction::Fallthrough);
1023 }
1024 self.labels[label] = Some(self.current_offset());
1025 }
1026
1027 pub fn resolve_fixups(&mut self) -> Result<()> {
1030 let mut offsets = Vec::with_capacity(self.instructions.len());
1032 let mut running = 0usize;
1033 for instr in &self.instructions {
1034 offsets.push(running);
1035 running += instr.encode().len();
1036 }
1037
1038 for &(instr_idx, label_id) in &self.fixups {
1039 let target_offset = self.labels[label_id]
1040 .ok_or_else(|| Error::Unsupported("unresolved label".to_string()))?;
1041
1042 let instr_start = offsets[instr_idx];
1044 let relative_offset = (target_offset as i32) - (instr_start as i32);
1045
1046 match &mut self.instructions[instr_idx] {
1047 Instruction::Jump { offset }
1048 | Instruction::LoadImmJump { offset, .. }
1049 | Instruction::BranchNeImm { offset, .. }
1050 | Instruction::BranchEqImm { offset, .. }
1051 | Instruction::BranchGeSImm { offset, .. }
1052 | Instruction::BranchLtUImm { offset, .. }
1053 | Instruction::BranchLeUImm { offset, .. }
1054 | Instruction::BranchGeUImm { offset, .. }
1055 | Instruction::BranchGtUImm { offset, .. }
1056 | Instruction::BranchLtSImm { offset, .. }
1057 | Instruction::BranchLeSImm { offset, .. }
1058 | Instruction::BranchGtSImm { offset, .. }
1059 | Instruction::BranchEq { offset, .. }
1060 | Instruction::BranchNe { offset, .. }
1061 | Instruction::BranchGeU { offset, .. }
1062 | Instruction::BranchLtU { offset, .. }
1063 | Instruction::BranchLtS { offset, .. }
1064 | Instruction::BranchGeS { offset, .. } => {
1065 *offset = relative_offset;
1066 }
1067 _ => {
1068 return Err(Error::Unsupported(
1069 "cannot fixup non-jump instruction".to_string(),
1070 ));
1071 }
1072 }
1073 }
1074 Ok(())
1075 }
1076}
1077
1078pub fn try_get_constant(val: BasicValueEnum<'_>) -> Option<i64> {
1081 if let BasicValueEnum::IntValue(iv) = val {
1082 iv.get_sign_extended_constant()
1083 } else {
1084 None
1085 }
1086}
1087
1088pub fn get_operand(instr: InstructionValue<'_>, i: u32) -> Result<BasicValueEnum<'_>> {
1090 instr
1091 .get_operand(i)
1092 .and_then(inkwell::values::Operand::value)
1093 .ok_or_else(|| Error::Internal(format!("missing operand {i} in {:?}", instr.get_opcode())))
1094}
1095
1096pub fn get_bb_operand(instr: InstructionValue<'_>, i: u32) -> Result<BasicBlock<'_>> {
1098 instr
1099 .get_operand(i)
1100 .and_then(inkwell::values::Operand::block)
1101 .ok_or_else(|| {
1102 Error::Internal(format!(
1103 "missing bb operand {i} in {:?}",
1104 instr.get_opcode()
1105 ))
1106 })
1107}
1108
1109pub fn result_slot(e: &PvmEmitter<'_>, instr: InstructionValue<'_>) -> Result<i32> {
1111 let key = val_key_instr(instr);
1112 e.get_slot(key)
1113 .ok_or_else(|| Error::Internal(format!("no slot for {:?} result", instr.get_opcode())))
1114}
1115
1116pub fn result_reg(e: &PvmEmitter<'_>, instr: InstructionValue<'_>) -> u8 {
1121 result_reg_or(e, instr, crate::abi::TEMP_RESULT)
1122}
1123
1124pub fn result_reg_or(e: &PvmEmitter<'_>, instr: InstructionValue<'_>, fallback: u8) -> u8 {
1128 let key = val_key_instr(instr);
1129 if let Some(&alloc_reg) = e.regalloc.val_to_reg.get(&key) {
1130 alloc_reg
1131 } else {
1132 fallback
1133 }
1134}
1135
1136pub fn operand_reg(e: &PvmEmitter<'_>, val: BasicValueEnum<'_>, fallback: u8) -> u8 {
1140 if let BasicValueEnum::IntValue(iv) = val {
1141 if iv.get_sign_extended_constant().is_some() || iv.get_zero_extended_constant().is_some() {
1143 return fallback;
1144 }
1145 if iv.is_poison() || iv.is_undef() {
1146 return fallback;
1147 }
1148 let key = val_key_int(iv);
1149 if let Some(&alloc_reg) = e.regalloc.val_to_reg.get(&key)
1150 && let Some(slot) = e.get_slot(key)
1151 && e.alloc_reg_slot[alloc_reg as usize] == Some(slot)
1152 {
1153 return alloc_reg;
1154 }
1155 }
1156 fallback
1157}
1158
1159pub fn operand_bit_width(instr: InstructionValue<'_>) -> u32 {
1169 if let inkwell::types::AnyTypeEnum::IntType(ty) = instr.get_type() {
1171 return ty.get_bit_width();
1172 }
1173 if let Some(op) = instr
1175 .get_operand(0)
1176 .and_then(inkwell::values::Operand::value)
1177 && let BasicValueEnum::IntValue(iv) = op
1178 {
1179 return iv.get_type().get_bit_width();
1180 }
1181 64 }
1183
1184pub fn source_bit_width(instr: InstructionValue<'_>) -> u32 {
1189 if let Some(op) = instr
1190 .get_operand(0)
1191 .and_then(inkwell::values::Operand::value)
1192 && let BasicValueEnum::IntValue(iv) = op
1193 {
1194 return iv.get_type().get_bit_width();
1195 }
1196 64 }
1198
1199pub fn has_phi_from<'ctx>(current_bb: BasicBlock<'ctx>, target_bb: BasicBlock<'ctx>) -> bool {
1201 for instr in target_bb.get_instructions() {
1202 use inkwell::values::InstructionOpcode;
1203 if instr.get_opcode() != InstructionOpcode::Phi {
1204 break;
1205 }
1206 let Ok(phi): std::result::Result<PhiValue<'ctx>, _> = instr.try_into() else {
1207 break;
1208 };
1209 let num_incomings = phi.count_incoming();
1210 for i in 0..num_incomings {
1211 if let Some((_, block)) = phi.get_incoming(i)
1212 && block == current_bb
1213 {
1214 return true;
1215 }
1216 }
1217 }
1218 false
1219}
1220
1221pub fn block_has_phis(bb: BasicBlock<'_>) -> bool {
1223 bb.get_first_instruction()
1224 .is_some_and(|instr| instr.get_opcode() == inkwell::values::InstructionOpcode::Phi)
1225}
1226
1227pub fn pre_scan_function<'ctx>(
1231 emitter: &mut PvmEmitter<'ctx>,
1232 function: FunctionValue<'ctx>,
1233 is_main: bool,
1234) {
1235 let mut has_calls = false;
1241 for bb in function.get_basic_blocks() {
1242 for instr in bb.get_instructions() {
1243 if instr.get_opcode() == inkwell::values::InstructionOpcode::Call && is_real_call(instr)
1244 {
1245 has_calls = true;
1246 break;
1247 }
1248 }
1249 if has_calls {
1250 break;
1251 }
1252 }
1253 emitter.has_calls = has_calls;
1254
1255 if !is_main && emitter.config.shrink_wrap_enabled {
1257 let num_params = function.count_params() as usize;
1258 let mut used = [false; 4];
1259
1260 for u in used
1262 .iter_mut()
1263 .take(crate::abi::MAX_LOCAL_REGS.min(num_params))
1264 {
1265 *u = true;
1266 }
1267
1268 if has_calls {
1271 used = [true; 4];
1272 } else {
1273 }
1277
1278 emitter.used_callee_regs = used;
1279
1280 let mut offset = if has_calls { 8i32 } else { 0i32 }; let mut offsets = [None; 4];
1284 for i in 0..4 {
1285 if used[i] {
1286 offsets[i] = Some(offset);
1287 offset += 8;
1288 }
1289 }
1290 emitter.callee_save_offsets = offsets;
1291 emitter.next_slot_offset = offset;
1292 }
1293 if emitter.config.cross_block_cache_enabled && emitter.config.register_cache_enabled {
1297 let blocks = function.get_basic_blocks();
1298 let mut pred_count: HashMap<BasicBlock<'ctx>, usize> = HashMap::new();
1299 let mut pred_from: HashMap<BasicBlock<'ctx>, BasicBlock<'ctx>> = HashMap::new();
1300
1301 for bb in &blocks {
1302 if let Some(term) = bb.get_terminator() {
1303 let successors = super::successors::collect_successors(term);
1304 let unique_succs: HashSet<_> = successors.into_iter().collect();
1307 for succ in unique_succs {
1308 let count = pred_count.entry(succ).or_insert(0);
1309 *count += 1;
1310 pred_from.insert(succ, *bb);
1311 }
1312 }
1313 }
1314
1315 for (bb, count) in &pred_count {
1316 if *count == 1
1317 && let Some(pred) = pred_from.get(bb)
1318 {
1319 emitter.block_single_pred.insert(*bb, *pred);
1320 }
1321 }
1322 }
1323
1324 for param in function.get_params() {
1326 let key = val_key_basic(param);
1327 emitter.alloc_slot_for_key(key);
1328 }
1329
1330 for bb in function.get_basic_blocks() {
1332 let label = emitter.alloc_label();
1333 emitter.block_labels.insert(bb, label);
1334 }
1335
1336 for bb in function.get_basic_blocks() {
1338 for instr in bb.get_instructions() {
1339 if instruction_produces_value(instr) {
1340 let key = val_key_instr(instr);
1341 emitter.alloc_slot_for_key(key);
1342 }
1343 }
1344 }
1345}
1346
1347fn instruction_produces_value(instr: InstructionValue<'_>) -> bool {
1348 use inkwell::values::InstructionOpcode;
1349 matches!(
1350 instr.get_opcode(),
1351 InstructionOpcode::Add
1352 | InstructionOpcode::Sub
1353 | InstructionOpcode::Mul
1354 | InstructionOpcode::UDiv
1355 | InstructionOpcode::SDiv
1356 | InstructionOpcode::URem
1357 | InstructionOpcode::SRem
1358 | InstructionOpcode::And
1359 | InstructionOpcode::Or
1360 | InstructionOpcode::Xor
1361 | InstructionOpcode::Shl
1362 | InstructionOpcode::LShr
1363 | InstructionOpcode::AShr
1364 | InstructionOpcode::ICmp
1365 | InstructionOpcode::ZExt
1366 | InstructionOpcode::SExt
1367 | InstructionOpcode::Trunc
1368 | InstructionOpcode::Select
1369 | InstructionOpcode::Phi
1370 | InstructionOpcode::Load
1371 | InstructionOpcode::Call
1372 )
1373}
1374
1375pub(super) fn is_real_call(instr: InstructionValue<'_>) -> bool {
1384 let call_site: std::result::Result<inkwell::values::CallSiteValue, _> = instr.try_into();
1385 let Ok(call_site) = call_site else {
1386 return true; };
1388 let Some(fn_val) = call_site.get_called_fn_value() else {
1389 return true; };
1391 let name = fn_val.get_name().to_string_lossy();
1392 if name.starts_with("__pvm_") && name != "__pvm_call_indirect" {
1394 return false;
1395 }
1396 if name.starts_with("llvm.") {
1397 return false;
1398 }
1399 true
1400}
1401
1402pub fn scratch_regs_safe(function: FunctionValue<'_>) -> bool {
1412 use inkwell::values::InstructionOpcode;
1413
1414 for bb in function.get_basic_blocks() {
1415 for instr in bb.get_instructions() {
1416 if instr.get_opcode() == InstructionOpcode::Call {
1417 let call_site: std::result::Result<inkwell::values::CallSiteValue, _> =
1418 instr.try_into();
1419 let Ok(cs) = call_site else {
1420 continue;
1421 };
1422 let Some(fn_val) = cs.get_called_fn_value() else {
1423 continue;
1424 };
1425 let name = fn_val.get_name().to_string_lossy();
1426 if matches!(
1428 name.as_ref(),
1429 "__pvm_memory_grow"
1430 | "__pvm_memory_fill"
1431 | "__pvm_memory_copy"
1432 | "__pvm_memory_init"
1433 ) {
1434 return false;
1435 }
1436 if name.starts_with("llvm.fshl.") || name.starts_with("llvm.fshr.") {
1438 return false;
1439 }
1440 }
1441 }
1442 }
1443 true
1444}
1445
1446pub use crate::abi::{ARGS_LEN_REG as SCRATCH1, ARGS_PTR_REG as SCRATCH2};