1#![allow(
17 clippy::cast_possible_truncation,
18 clippy::cast_possible_wrap,
19 clippy::cast_sign_loss
20)]
21
22mod alu;
23mod calls;
24mod control_flow;
25mod emitter;
26mod intrinsics;
27mod memory;
28pub(crate) mod regalloc;
29mod successors;
30
31pub use emitter::{
32 EmitterConfig, LlvmCallFixup, LlvmFunctionTranslation, LlvmIndirectCallFixup, LoweringContext,
33};
34
35use std::collections::HashMap;
36
37use inkwell::basic_block::BasicBlock;
38use inkwell::values::{FunctionValue, InstructionOpcode};
39
40use crate::pvm::Instruction;
41use crate::{Error, Result, abi};
42
43use abi::{TEMP1, TEMP2};
44use emitter::{PvmEmitter, pre_scan_function, val_key_instr};
45
46pub fn lower_function(
48 function: FunctionValue<'_>,
49 ctx: &LoweringContext,
50 is_main: bool,
51 _func_idx: usize,
52 call_return_base: usize,
53) -> Result<LlvmFunctionTranslation> {
54 let config = EmitterConfig {
55 wasm_memory_base: ctx.wasm_memory_base,
56 param_overflow_base: ctx.param_overflow_base,
57 register_cache_enabled: ctx.optimizations.register_cache,
58 icmp_fusion_enabled: ctx.optimizations.icmp_branch_fusion,
59 shrink_wrap_enabled: ctx.optimizations.shrink_wrap_callee_saves,
60 constant_propagation_enabled: ctx.optimizations.constant_propagation,
61 cross_block_cache_enabled: ctx.optimizations.cross_block_cache,
62 register_allocation_enabled: ctx.optimizations.register_allocation,
63 fallthrough_jumps_enabled: ctx.optimizations.fallthrough_jumps,
64 lazy_spill_enabled: ctx.optimizations.lazy_spill,
65 };
66 let mut emitter = PvmEmitter::new(config, call_return_base);
67
68 pre_scan_function(&mut emitter, function, is_main);
70 emitter.frame_size = emitter.next_slot_offset;
71
72 if emitter.config.register_allocation_enabled {
74 let is_leaf = !emitter.has_calls;
75 let scratch_safe =
76 ctx.optimizations.allocate_scratch_regs && emitter::scratch_regs_safe(function);
77 emitter.regalloc = regalloc::run(
78 function,
79 &emitter.value_slots,
80 is_leaf,
81 function.count_params() as usize,
82 ctx.optimizations.aggressive_register_allocation,
83 scratch_safe,
84 ctx.optimizations.allocate_caller_saved_regs,
85 );
86
87 for ® in emitter.regalloc.reg_to_slot.keys() {
90 if reg >= crate::abi::FIRST_LOCAL_REG
91 && reg < crate::abi::FIRST_LOCAL_REG + crate::abi::MAX_LOCAL_REGS as u8
92 {
93 let idx = (reg - crate::abi::FIRST_LOCAL_REG) as usize;
94 if !emitter.used_callee_regs[idx] {
95 emitter.used_callee_regs[idx] = true;
96 emitter.callee_save_offsets[idx] = Some(emitter.next_slot_offset);
98 emitter.next_slot_offset += 8;
99 emitter.frame_size = emitter.next_slot_offset;
100 }
101 }
102 }
103 }
104
105 emit_prologue(&mut emitter, function, ctx, is_main)?;
107
108 if emitter.config.lazy_spill_enabled {
113 emitter.spill_all_dirty_regs();
114 }
115
116 let use_cross_block_cache =
118 emitter.config.register_cache_enabled && emitter.config.cross_block_cache_enabled;
119 let has_regalloc = !emitter.regalloc.val_to_reg.is_empty();
120 let is_leaf = !emitter.has_calls;
121 let mut block_exit_cache: HashMap<BasicBlock<'_>, emitter::CacheSnapshot> = HashMap::new();
122
123 let pred_map: HashMap<BasicBlock<'_>, Vec<BasicBlock<'_>>> = if has_regalloc
128 && (!is_leaf || emitter.config.lazy_spill_enabled)
129 {
130 let mut map: HashMap<BasicBlock<'_>, Vec<BasicBlock<'_>>> = HashMap::new();
131 for bb in function.get_basic_blocks() {
132 if let Some(term) = bb.get_terminator() {
133 let successors = successors::collect_successors(term);
134 let unique_succs: std::collections::HashSet<_> = successors.into_iter().collect();
135 for succ in unique_succs {
136 map.entry(succ).or_default().push(bb);
137 }
138 }
139 }
140 map
141 } else {
142 HashMap::new()
143 };
144
145 let basic_blocks = function.get_basic_blocks();
146 for (block_idx, bb) in basic_blocks.iter().enumerate() {
147 let bb = *bb;
148 let label = emitter.block_labels[&bb];
149 let pred_info = emitter.block_single_pred.get(&bb).copied();
150
151 emitter.next_block_label = basic_blocks
153 .get(block_idx + 1)
154 .and_then(|next_bb| emitter.block_labels.get(next_bb).copied());
155
156 let mut propagated = false;
157 if use_cross_block_cache
158 && let Some(pred_bb) = pred_info
159 && !emitter::block_has_phis(bb)
160 && let Some(snapshot) = block_exit_cache.get(&pred_bb).cloned()
161 {
162 emitter.define_label_preserving_cache(label);
163 emitter.restore_cache(&snapshot);
164 propagated = true;
165 }
166
167 if !propagated {
168 if has_regalloc && is_leaf && !emitter.config.lazy_spill_enabled {
169 emitter.define_label_preserving_alloc(label);
173 } else if has_regalloc && (!is_leaf || emitter.config.lazy_spill_enabled) {
174 emitter.define_label(label);
179 if let Some(preds) = pred_map.get(&bb) {
180 let all_processed = preds.iter().all(|p| block_exit_cache.contains_key(p));
181 if all_processed && !preds.is_empty() {
182 let first_snap = &block_exit_cache[&preds[0]];
184 emitter.set_alloc_reg_slot_from(&first_snap.alloc_reg_slot);
185 for pred in &preds[1..] {
186 let snap = &block_exit_cache[pred];
187 emitter.intersect_alloc_reg_slot(&snap.alloc_reg_slot);
188 }
189 } else if !preds.is_empty() {
190 let mut processed =
197 preds.iter().filter(|p| block_exit_cache.contains_key(p));
198 if let Some(first) = processed.next() {
199 let first_snap = &block_exit_cache[first];
200 if is_leaf {
201 emitter.set_alloc_reg_slot_from(&first_snap.alloc_reg_slot);
202 } else {
203 let clobbered_locals = emitter
204 .regalloc
205 .stats
206 .max_call_args
207 .min(crate::abi::MAX_LOCAL_REGS);
208 let first_safe =
209 crate::abi::FIRST_LOCAL_REG + clobbered_locals as u8;
210 let last_safe =
211 crate::abi::FIRST_LOCAL_REG + crate::abi::MAX_LOCAL_REGS as u8;
212 emitter
213 .set_alloc_reg_slot_filtered(&first_snap.alloc_reg_slot, |r| {
214 r >= first_safe && r < last_safe
215 });
216 }
217 for pred in processed {
218 let snap = &block_exit_cache[pred];
219 emitter.intersect_alloc_reg_slot(&snap.alloc_reg_slot);
220 }
221 }
222 }
223 }
224 } else {
225 emitter.define_label(label);
226 }
227
228 if has_regalloc && emitter.config.lazy_spill_enabled && emitter::block_has_phis(bb) {
235 restore_phi_alloc_reg_slots(&mut emitter, bb);
236 }
237 }
238
239 let instructions: Vec<_> = bb.get_instructions().collect();
245 if use_cross_block_cache && !instructions.is_empty() {
246 let term_idx = instructions.len() - 1;
247 for &instruction in &instructions[..term_idx] {
248 lower_instruction(&mut emitter, instruction, bb, ctx, is_main)?;
249 }
250 if emitter.config.lazy_spill_enabled {
260 emitter.spill_all_dirty_regs();
261 }
262 let mut snap = emitter.snapshot_cache();
267 snap.invalidate_reg(TEMP1);
268 snap.invalidate_reg(TEMP2);
269 block_exit_cache.insert(bb, snap);
270 lower_instruction(&mut emitter, instructions[term_idx], bb, ctx, is_main)?;
272 } else if !instructions.is_empty() && emitter.config.lazy_spill_enabled {
273 let term_idx = instructions.len() - 1;
274 for &instruction in &instructions[..term_idx] {
275 lower_instruction(&mut emitter, instruction, bb, ctx, is_main)?;
276 }
277 emitter.spill_all_dirty_regs();
279 lower_instruction(&mut emitter, instructions[term_idx], bb, ctx, is_main)?;
280 } else {
281 for &instruction in &instructions {
282 lower_instruction(&mut emitter, instruction, bb, ctx, is_main)?;
283 }
284 }
285 }
286 emitter.next_block_label = None;
287
288 let pre_dse_instructions = emitter.instructions.len();
290
291 if ctx.optimizations.dead_store_elimination {
298 let protected_offsets: std::collections::HashSet<i32> = std::collections::HashSet::new();
299 crate::pvm::peephole::eliminate_dead_stores(
300 &mut emitter.instructions,
301 &mut emitter.fixups,
302 &mut emitter.call_fixups,
303 &mut emitter.indirect_call_fixups,
304 &mut emitter.labels,
305 &protected_offsets,
306 );
307 }
308
309 let pre_peephole_instructions = emitter.instructions.len();
311
312 if ctx.optimizations.peephole {
314 crate::pvm::peephole::optimize(
315 &mut emitter.instructions,
316 &mut emitter.fixups,
317 &mut emitter.call_fixups,
318 &mut emitter.indirect_call_fixups,
319 &mut emitter.labels,
320 );
321 }
322
323 emitter.resolve_fixups()?;
324
325 if emitter.config.register_allocation_enabled {
326 let fn_name = function.get_name().to_string_lossy().to_string();
327 let stats = &emitter.regalloc.stats;
328 let usage = &emitter.regalloc_usage;
329 tracing::info!(
330 target: "wasm_pvm::regalloc",
331 function = %fn_name,
332 is_leaf = !emitter.has_calls,
333 params = function.count_params(),
334 total_values = stats.total_values,
335 total_intervals = stats.total_intervals,
336 has_loops = stats.has_loops,
337 allocatable_regs = stats.allocatable_regs,
338 allocated_values = stats.allocated_values,
339 skipped_reason = ?stats.skipped_reason,
340 alloc_load_hits = usage.load_hits,
341 alloc_load_reloads = usage.load_reloads,
342 alloc_load_moves = usage.load_moves,
343 alloc_store_hits = usage.store_hits,
344 alloc_store_moves = usage.store_moves,
345 emitted_instructions = emitter.instructions.len(),
346 "regalloc lowering summary"
347 );
348 }
349
350 let regalloc_stats = &emitter.regalloc.stats;
352 let regalloc_usage = &emitter.regalloc_usage;
353 let lowering_stats = emitter::FunctionLoweringStats {
354 frame_size: emitter.frame_size,
355 is_leaf: !emitter.has_calls,
356 pre_dse_instructions,
357 pre_peephole_instructions,
358 regalloc_total_values: regalloc_stats.total_values,
359 regalloc_allocated_values: regalloc_stats.allocated_values,
360 regalloc_registers_used: emitter.regalloc.reg_to_slot.keys().copied().collect(),
361 regalloc_skipped_reason: regalloc_stats.skipped_reason,
362 regalloc_load_hits: regalloc_usage.load_hits,
363 regalloc_load_reloads: regalloc_usage.load_reloads,
364 regalloc_load_moves: regalloc_usage.load_moves,
365 regalloc_store_hits: regalloc_usage.store_hits,
366 regalloc_store_moves: regalloc_usage.store_moves,
367 };
368
369 let num_call_returns = emitter.num_call_returns();
370 Ok(LlvmFunctionTranslation {
371 instructions: emitter.instructions,
372 call_fixups: emitter.call_fixups,
373 indirect_call_fixups: emitter.indirect_call_fixups,
374 num_call_returns,
375 lowering_stats,
376 })
377}
378
379fn restore_phi_alloc_reg_slots(e: &mut PvmEmitter<'_>, bb: BasicBlock<'_>) {
386 for instr in bb.get_instructions() {
387 if instr.get_opcode() != InstructionOpcode::Phi {
388 break;
389 }
390 let phi_key = val_key_instr(instr);
391 if let Some(&phi_reg) = e.regalloc.val_to_reg.get(&phi_key)
392 && let Some(phi_slot) = e.get_slot(phi_key)
393 {
394 e.set_alloc_reg_for_slot(phi_reg, phi_slot);
395 }
396 }
397}
398
399fn emit_prologue<'ctx>(
401 e: &mut PvmEmitter<'ctx>,
402 function: FunctionValue<'ctx>,
403 ctx: &LoweringContext,
404 is_main: bool,
405) -> Result<()> {
406 if !is_main {
407 let limit = abi::stack_limit(ctx.stack_size);
409 let continue_label = e.alloc_label();
410
411 e.emit(Instruction::LoadImm64 {
415 reg: TEMP1,
416 value: u64::from(limit as u32),
417 });
418 e.emit(Instruction::AddImm64 {
419 dst: TEMP2,
420 src: abi::STACK_PTR_REG,
421 value: -e.frame_size,
422 });
423 let fixup_idx = e.instructions.len();
426 e.fixups.push((fixup_idx, continue_label));
427 e.emit(Instruction::BranchGeU {
428 reg1: TEMP1,
429 reg2: TEMP2,
430 offset: 0,
431 });
432 e.emit(Instruction::Trap);
433 e.define_label(continue_label);
434 }
435
436 e.emit(Instruction::AddImm64 {
438 dst: abi::STACK_PTR_REG,
439 src: abi::STACK_PTR_REG,
440 value: -e.frame_size,
441 });
442
443 if !is_main {
444 if e.has_calls {
446 e.emit(Instruction::StoreIndU64 {
447 base: abi::STACK_PTR_REG,
448 src: abi::RETURN_ADDR_REG,
449 offset: 0,
450 });
451 }
452
453 for i in 0..abi::MAX_LOCAL_REGS {
455 if let Some(offset) = e.callee_save_offsets[i] {
456 e.emit(Instruction::StoreIndU64 {
457 base: abi::STACK_PTR_REG,
458 src: abi::FIRST_LOCAL_REG + i as u8,
459 offset,
460 });
461 }
462 }
463 }
464
465 let params = function.get_params();
467 for (i, param) in params.iter().enumerate() {
468 let key = emitter::val_key_basic(*param);
469 let slot = e
470 .get_slot(key)
471 .ok_or_else(|| Error::Internal(format!("no slot for parameter {i} (key {key:?})")))?;
472
473 if is_main {
474 if i == 0 {
477 e.emit(Instruction::AddImm64 {
478 dst: abi::ARGS_PTR_REG,
479 src: abi::ARGS_PTR_REG,
480 value: -e.config.wasm_memory_base,
481 });
482 e.store_to_slot(slot, abi::ARGS_PTR_REG);
483 } else if i == 1 {
484 e.store_to_slot(slot, abi::ARGS_LEN_REG);
485 }
486 } else if i < abi::MAX_LOCAL_REGS {
487 e.store_to_slot(slot, abi::FIRST_LOCAL_REG + i as u8);
489 } else {
490 let overflow_offset =
492 e.config.param_overflow_base + ((i - abi::MAX_LOCAL_REGS) * 8) as i32;
493 e.emit(Instruction::LoadImm {
494 reg: TEMP1,
495 value: overflow_offset,
496 });
497 e.emit(Instruction::LoadIndU64 {
498 dst: TEMP1,
499 base: TEMP1,
500 offset: 0,
501 });
502 e.store_to_slot(slot, TEMP1);
503 }
504 }
505
506 Ok(())
507}
508
509fn lower_instruction<'ctx>(
511 e: &mut PvmEmitter<'ctx>,
512 instr: inkwell::values::InstructionValue<'ctx>,
513 current_bb: inkwell::basic_block::BasicBlock<'ctx>,
514 ctx: &LoweringContext,
515 is_main: bool,
516) -> Result<()> {
517 use alu::{
518 BinaryOp, lower_binary_arith, lower_icmp, lower_select, lower_sext, lower_trunc, lower_zext,
519 };
520 use calls::lower_call;
521 use control_flow::{lower_br, lower_return, lower_switch};
522 use memory::{lower_wasm_global_load, lower_wasm_global_store};
523
524 match instr.get_opcode() {
525 InstructionOpcode::Add => lower_binary_arith(e, instr, BinaryOp::Add),
527 InstructionOpcode::Sub => lower_binary_arith(e, instr, BinaryOp::Sub),
528 InstructionOpcode::Mul => lower_binary_arith(e, instr, BinaryOp::Mul),
529 InstructionOpcode::UDiv => lower_binary_arith(e, instr, BinaryOp::UDiv),
530 InstructionOpcode::SDiv => lower_binary_arith(e, instr, BinaryOp::SDiv),
531 InstructionOpcode::URem => lower_binary_arith(e, instr, BinaryOp::URem),
532 InstructionOpcode::SRem => lower_binary_arith(e, instr, BinaryOp::SRem),
533
534 InstructionOpcode::And => lower_binary_arith(e, instr, BinaryOp::And),
536 InstructionOpcode::Or => lower_binary_arith(e, instr, BinaryOp::Or),
537 InstructionOpcode::Xor => lower_binary_arith(e, instr, BinaryOp::Xor),
538 InstructionOpcode::Shl => lower_binary_arith(e, instr, BinaryOp::Shl),
539 InstructionOpcode::LShr => lower_binary_arith(e, instr, BinaryOp::LShr),
540 InstructionOpcode::AShr => lower_binary_arith(e, instr, BinaryOp::AShr),
541
542 InstructionOpcode::ICmp => lower_icmp(e, instr),
544
545 InstructionOpcode::ZExt => lower_zext(e, instr),
547 InstructionOpcode::SExt => lower_sext(e, instr),
548 InstructionOpcode::Trunc => lower_trunc(e, instr),
549
550 InstructionOpcode::Select => lower_select(e, instr),
552
553 InstructionOpcode::Br => lower_br(e, instr, current_bb),
555 InstructionOpcode::Switch => lower_switch(e, instr, current_bb),
556 InstructionOpcode::Return => lower_return(e, instr, is_main),
557 InstructionOpcode::Unreachable => {
558 e.emit(Instruction::Trap);
559 Ok(())
560 }
561
562 InstructionOpcode::Load => lower_wasm_global_load(e, instr, ctx),
564 InstructionOpcode::Store => lower_wasm_global_store(e, instr, ctx),
565
566 InstructionOpcode::Call => lower_call(e, instr, ctx),
568
569 InstructionOpcode::Phi => Ok(()),
571
572 _ => Err(Error::Unsupported(format!(
573 "LLVM opcode {:?}",
574 instr.get_opcode()
575 ))),
576 }
577}