use crate::{
assembly::{
instruction::{FlowType, Immediate, Instruction, Operand, OperandType},
instructions::{CilInstruction, INSTRUCTIONS, INSTRUCTIONS_FE},
},
Error, Result,
};
use std::{
collections::{HashMap, HashSet},
sync::OnceLock,
};
static MNEMONIC_TO_OPCODE: OnceLock<
HashMap<&'static str, (u8, u8, &'static CilInstruction<'static>)>,
> = OnceLock::new();
fn get_mnemonic_lookup(
) -> &'static HashMap<&'static str, (u8, u8, &'static CilInstruction<'static>)> {
MNEMONIC_TO_OPCODE.get_or_init(|| {
let mut map = HashMap::new();
for (opcode, instr) in INSTRUCTIONS.iter().enumerate() {
if !instr.instr.is_empty() {
let opcode_u8 = u8::try_from(opcode)
.unwrap_or_else(|_| panic!("Opcode {opcode} exceeds u8 range"));
map.insert(instr.instr, (opcode_u8, 0, instr));
}
}
for (opcode, instr) in INSTRUCTIONS_FE.iter().enumerate() {
if !instr.instr.is_empty() {
let opcode_u8 = u8::try_from(opcode)
.unwrap_or_else(|_| panic!("Opcode {opcode} exceeds u8 range"));
map.insert(instr.instr, (opcode_u8, 0xFE, instr));
}
}
map
})
}
#[derive(Debug, Clone)]
pub struct LabelFixup {
pub label: String,
pub fixup_position: usize,
pub offset_size: u8,
pub instruction_position: usize,
pub short_form_mnemonic: Option<&'static str>,
}
#[derive(Debug, Clone)]
pub struct SwitchFixup {
pub labels: Vec<String>,
pub fixup_position: usize,
pub instruction_end_position: usize,
}
pub struct InstructionEncoder {
bytecode: Vec<u8>,
labels: HashMap<String, u32>,
fixups: Vec<LabelFixup>,
switch_fixups: Vec<SwitchFixup>,
current_stack_depth: i16,
max_stack_depth: u16,
label_stack_depths: HashMap<String, i16>,
unreachable: bool,
}
fn get_short_form_mnemonic(mnemonic: &str) -> Option<&'static str> {
match mnemonic {
"br" => Some("br.s"),
"brfalse" => Some("brfalse.s"),
"brtrue" => Some("brtrue.s"),
"beq" => Some("beq.s"),
"bne.un" => Some("bne.un.s"),
"bge" => Some("bge.s"),
"bge.un" => Some("bge.un.s"),
"bgt" => Some("bgt.s"),
"bgt.un" => Some("bgt.un.s"),
"ble" => Some("ble.s"),
"ble.un" => Some("ble.un.s"),
"blt" => Some("blt.s"),
"blt.un" => Some("blt.un.s"),
"leave" => Some("leave.s"),
_ => None,
}
}
impl InstructionEncoder {
#[must_use]
pub fn new() -> Self {
Self {
bytecode: Vec::new(),
labels: HashMap::new(),
fixups: Vec::new(),
switch_fixups: Vec::new(),
current_stack_depth: 0,
max_stack_depth: 0,
label_stack_depths: HashMap::new(),
unreachable: false,
}
}
pub fn emit_instruction(&mut self, mnemonic: &str, operand: Option<Operand>) -> Result<()> {
let (opcode, prefix, metadata) = get_mnemonic_lookup()
.get(mnemonic)
.ok_or_else(|| Error::InvalidMnemonic(mnemonic.to_string()))?;
if *prefix != 0 {
self.bytecode.push(*prefix);
}
self.bytecode.push(*opcode);
self.emit_operand(operand, metadata.op_type)?;
self.update_stack_depth(metadata.stack_pops, metadata.stack_pushes)
.map_err(|e| malformed_error!("Stack error at instruction '{}': {}", mnemonic, e))?;
if matches!(
metadata.flow,
FlowType::Return | FlowType::Throw | FlowType::EndFinally
) {
self.unreachable = true;
}
Ok(())
}
pub fn emit_instruction_decoded(&mut self, instruction: &Instruction) -> Result<()> {
let operand = if matches!(instruction.operand, Operand::None) {
None
} else {
Some(instruction.operand.clone())
};
self.emit_instruction(instruction.mnemonic, operand)
}
pub fn emit_branch(&mut self, mnemonic: &str, label: &str) -> Result<()> {
let (opcode, prefix, metadata) = get_mnemonic_lookup()
.get(mnemonic)
.ok_or_else(|| Error::InvalidMnemonic(mnemonic.to_string()))?;
if !matches!(
metadata.flow,
FlowType::ConditionalBranch | FlowType::UnconditionalBranch | FlowType::Leave
) {
return Err(Error::InvalidBranch(format!(
"instruction '{mnemonic}' is not a branch instruction"
)));
}
let instruction_start = self.bytecode.len();
if *prefix != 0 {
self.bytecode.push(*prefix);
}
self.bytecode.push(*opcode);
let offset_size = match metadata.op_type {
OperandType::Int8 => 1,
OperandType::Int16 => 2,
OperandType::Int32 => 4,
_ => {
return Err(Error::InvalidBranch(
"operand type must be Int8, Int16, or Int32".to_string(),
))
}
};
let short_form_mnemonic = if offset_size == 4 {
get_short_form_mnemonic(mnemonic)
} else {
None
};
let fixup = LabelFixup {
label: label.to_string(),
fixup_position: self.bytecode.len(),
offset_size,
instruction_position: instruction_start,
short_form_mnemonic,
};
self.fixups.push(fixup);
for _ in 0..offset_size {
self.bytecode.push(0);
}
self.update_stack_depth(metadata.stack_pops, metadata.stack_pushes)
.map_err(|e| {
malformed_error!("Stack error at branch '{}' to '{}': {}", mnemonic, label, e)
})?;
self.record_label_stack_depth(label)?;
if matches!(
metadata.flow,
FlowType::UnconditionalBranch | FlowType::Leave
) {
self.unreachable = true;
}
Ok(())
}
pub fn emit_switch(&mut self, labels: &[&str]) -> Result<()> {
if labels.is_empty() {
return Err(Error::InvalidBranch(
"switch must have at least one target".to_string(),
));
}
self.bytecode.push(0x45);
let num_targets =
u32::try_from(labels.len()).map_err(|_| malformed_error!("Too many switch targets"))?;
self.bytecode.extend_from_slice(&num_targets.to_le_bytes());
let fixup_position = self.bytecode.len();
for _ in 0..labels.len() {
self.bytecode.extend_from_slice(&[0, 0, 0, 0]);
}
let instruction_end_position = self.bytecode.len();
let switch_fixup = SwitchFixup {
labels: labels.iter().map(|s| (*s).to_string()).collect(),
fixup_position,
instruction_end_position,
};
self.switch_fixups.push(switch_fixup);
self.update_stack_depth(1, 0)
.map_err(|e| malformed_error!("Stack error at 'switch': {}", e))?;
for label in labels {
self.record_label_stack_depth(label)?;
}
Ok(())
}
pub fn emit_call(
&mut self,
mnemonic: &str,
operand: Option<Operand>,
num_args: u8,
has_result: bool,
) -> Result<()> {
let (opcode, prefix, metadata) = get_mnemonic_lookup()
.get(mnemonic)
.ok_or_else(|| Error::InvalidMnemonic(mnemonic.to_string()))?;
if *prefix != 0 {
self.bytecode.push(*prefix);
}
self.bytecode.push(*opcode);
self.emit_operand(operand, metadata.op_type)?;
let pushes = u8::from(has_result);
self.update_stack_depth(num_args, pushes)
.map_err(|e| malformed_error!("Stack error at call '{}': {}", mnemonic, e))?;
Ok(())
}
pub fn emit_ldarg(&mut self, index: u16) -> Result<()> {
match index {
0 => self.emit_instruction("ldarg.0", None),
1 => self.emit_instruction("ldarg.1", None),
2 => self.emit_instruction("ldarg.2", None),
3 => self.emit_instruction("ldarg.3", None),
x if x <= 255 =>
{
#[allow(clippy::cast_possible_truncation)]
self.emit_instruction(
"ldarg.s",
Some(Operand::Immediate(Immediate::UInt8(x as u8))),
)
}
x =>
{
#[allow(clippy::cast_possible_wrap)]
self.emit_instruction(
"ldarg",
Some(Operand::Immediate(Immediate::Int16(x as i16))),
)
}
}
}
pub fn emit_ldloc(&mut self, index: u16) -> Result<()> {
match index {
0 => self.emit_instruction("ldloc.0", None),
1 => self.emit_instruction("ldloc.1", None),
2 => self.emit_instruction("ldloc.2", None),
3 => self.emit_instruction("ldloc.3", None),
x if x <= 255 =>
{
#[allow(clippy::cast_possible_truncation)]
self.emit_instruction(
"ldloc.s",
Some(Operand::Immediate(Immediate::UInt8(x as u8))),
)
}
x =>
{
#[allow(clippy::cast_possible_wrap)]
self.emit_instruction(
"ldloc",
Some(Operand::Immediate(Immediate::Int16(x as i16))),
)
}
}
}
pub fn emit_starg(&mut self, index: u16) -> Result<()> {
if index <= 255 {
#[allow(clippy::cast_possible_truncation)]
self.emit_instruction(
"starg.s",
Some(Operand::Immediate(Immediate::UInt8(index as u8))),
)
} else {
#[allow(clippy::cast_possible_wrap)]
self.emit_instruction(
"starg",
Some(Operand::Immediate(Immediate::Int16(index as i16))),
)
}
}
pub fn emit_stloc(&mut self, index: u16) -> Result<()> {
match index {
0 => self.emit_instruction("stloc.0", None),
1 => self.emit_instruction("stloc.1", None),
2 => self.emit_instruction("stloc.2", None),
3 => self.emit_instruction("stloc.3", None),
x if x <= 255 =>
{
#[allow(clippy::cast_possible_truncation)]
self.emit_instruction(
"stloc.s",
Some(Operand::Immediate(Immediate::UInt8(x as u8))),
)
}
x =>
{
#[allow(clippy::cast_possible_wrap)]
self.emit_instruction(
"stloc",
Some(Operand::Immediate(Immediate::Int16(x as i16))),
)
}
}
}
pub fn emit_ldc_i4(&mut self, value: i32) -> Result<()> {
match value {
-1 => self.emit_instruction("ldc.i4.m1", None),
0 => self.emit_instruction("ldc.i4.0", None),
1 => self.emit_instruction("ldc.i4.1", None),
2 => self.emit_instruction("ldc.i4.2", None),
3 => self.emit_instruction("ldc.i4.3", None),
4 => self.emit_instruction("ldc.i4.4", None),
5 => self.emit_instruction("ldc.i4.5", None),
6 => self.emit_instruction("ldc.i4.6", None),
7 => self.emit_instruction("ldc.i4.7", None),
8 => self.emit_instruction("ldc.i4.8", None),
x if (-128..=127).contains(&x) =>
{
#[allow(clippy::cast_possible_truncation)]
self.emit_instruction(
"ldc.i4.s",
Some(Operand::Immediate(Immediate::Int8(x as i8))),
)
}
x => self.emit_instruction("ldc.i4", Some(Operand::Immediate(Immediate::Int32(x)))),
}
}
pub fn emit_ldarga(&mut self, index: u16) -> Result<()> {
if index <= 255 {
#[allow(clippy::cast_possible_truncation)]
self.emit_instruction(
"ldarga.s",
Some(Operand::Immediate(Immediate::UInt8(index as u8))),
)
} else {
#[allow(clippy::cast_possible_wrap)]
self.emit_instruction(
"ldarga",
Some(Operand::Immediate(Immediate::Int16(index as i16))),
)
}
}
pub fn emit_ldloca(&mut self, index: u16) -> Result<()> {
if index <= 255 {
#[allow(clippy::cast_possible_truncation)]
self.emit_instruction(
"ldloca.s",
Some(Operand::Immediate(Immediate::UInt8(index as u8))),
)
} else {
#[allow(clippy::cast_possible_wrap)]
self.emit_instruction(
"ldloca",
Some(Operand::Immediate(Immediate::Int16(index as i16))),
)
}
}
pub fn define_label(&mut self, name: &str) -> Result<()> {
if self.labels.contains_key(name) {
return Err(Error::DuplicateLabel(name.to_string()));
}
if let Some(&expected) = self.label_stack_depths.get(name) {
if self.unreachable {
self.current_stack_depth = expected;
} else if self.current_stack_depth != expected {
return Err(malformed_error!(
"Stack depth mismatch at label '{}': expected {} (from branch), got {} (current)",
name,
expected,
self.current_stack_depth
));
}
} else if !self.unreachable {
self.label_stack_depths
.insert(name.to_string(), self.current_stack_depth);
} else {
self.current_stack_depth = 0;
}
self.unreachable = false;
let bytecode_len = u32::try_from(self.bytecode.len())
.map_err(|_| malformed_error!("Bytecode length exceeds u32 range"))?;
self.labels.insert(name.to_string(), bytecode_len);
Ok(())
}
#[must_use]
pub fn current_position(&self) -> u32 {
#[allow(clippy::cast_possible_truncation)]
let position = self.bytecode.len() as u32;
position
}
fn ensure_method_terminated(&mut self) -> Result<()> {
if self.bytecode.is_empty() {
return Ok(());
}
let ends_with_terminator = if let Some(&last_byte) = self.bytecode.last() {
matches!(last_byte, 0x2A | 0x7A | 0xDC | 0x27)
|| (self.bytecode.len() >= 2
&& self.bytecode[self.bytecode.len() - 2] == 0xFE
&& last_byte == 0x1A)
} else {
false
};
if ends_with_terminator {
return Ok(());
}
let current_len = self.bytecode.len();
let has_branch_to_end = self.fixups.iter().any(|fixup| {
if let Some(&label_pos) = self.labels.get(&fixup.label) {
label_pos as usize >= current_len
} else {
false
}
});
if has_branch_to_end {
self.emit_instruction("ret", None)?;
}
Ok(())
}
#[must_use]
pub fn label_offset(&self, name: &str) -> Option<u32> {
self.labels.get(name).copied()
}
pub fn finalize(mut self) -> Result<(Vec<u8>, u16, HashMap<String, u32>)> {
self.ensure_method_terminated()?;
self.optimize_branch_forms()?;
let fixups = std::mem::take(&mut self.fixups);
for fixup in &fixups {
let label_position = self
.labels
.get(&fixup.label)
.ok_or_else(|| Error::UndefinedLabel(fixup.label.clone()))?;
let next_instruction_pos = fixup.fixup_position + fixup.offset_size as usize;
let label_pos_i32 = i32::try_from(*label_position)
.map_err(|_| malformed_error!("Label position exceeds i32 range"))?;
let next_instr_pos_i32 = i32::try_from(next_instruction_pos)
.map_err(|_| malformed_error!("Instruction position exceeds i32 range"))?;
let offset = label_pos_i32 - next_instr_pos_i32;
self.write_branch_offset(offset, fixup)?;
}
let switch_fixups = std::mem::take(&mut self.switch_fixups);
for switch_fixup in &switch_fixups {
let instruction_end_i32 = i32::try_from(switch_fixup.instruction_end_position)
.map_err(|_| malformed_error!("Switch instruction end exceeds i32 range"))?;
for (i, label) in switch_fixup.labels.iter().enumerate() {
let label_position = self
.labels
.get(label)
.ok_or_else(|| Error::UndefinedLabel(label.clone()))?;
let label_pos_i32 = i32::try_from(*label_position)
.map_err(|_| malformed_error!("Label position exceeds i32 range"))?;
let offset = label_pos_i32 - instruction_end_i32;
let target_pos = switch_fixup.fixup_position + i * 4;
let offset_bytes = offset.to_le_bytes();
self.bytecode[target_pos..target_pos + 4].copy_from_slice(&offset_bytes);
}
}
Ok((self.bytecode, self.max_stack_depth, self.labels))
}
fn optimize_branch_forms(&mut self) -> Result<()> {
const MAX_ITERATIONS: usize = 100;
for _ in 0..MAX_ITERATIONS {
let shrinkable = self.find_shrinkable_branches()?;
if shrinkable.is_empty() {
break; }
self.apply_branch_shrinking(&shrinkable)?;
}
Ok(())
}
fn find_shrinkable_branches(&self) -> Result<Vec<usize>> {
let mut shrinkable = Vec::new();
for (idx, fixup) in self.fixups.iter().enumerate() {
if fixup.offset_size != 4 || fixup.short_form_mnemonic.is_none() {
continue;
}
let label_position = self
.labels
.get(&fixup.label)
.ok_or_else(|| Error::UndefinedLabel(fixup.label.clone()))?;
let short_form_end = fixup.instruction_position + 2;
let label_pos_i32 = i32::try_from(*label_position)
.map_err(|_| malformed_error!("Label position exceeds i32 range"))?;
let short_end_i32 = i32::try_from(short_form_end)
.map_err(|_| malformed_error!("Instruction position exceeds i32 range"))?;
let offset = label_pos_i32 - short_end_i32;
if (-128..=127).contains(&offset) {
shrinkable.push(idx);
}
}
Ok(shrinkable)
}
fn apply_branch_shrinking(&mut self, shrinkable: &[usize]) -> Result<()> {
if shrinkable.is_empty() {
return Ok(());
}
let shrinkable_set: HashSet<usize> = shrinkable.iter().copied().collect();
let mut adjustments: Vec<(usize, i32)> = Vec::new(); let mut cumulative = 0i32;
for &idx in shrinkable {
let fixup = &self.fixups[idx];
let instr_end = fixup.fixup_position + fixup.offset_size as usize;
cumulative -= 3; adjustments.push((instr_end, cumulative));
}
let adjust_position = |pos: usize| -> usize {
let mut adj = 0i32;
for &(threshold, cumulative_adj) in &adjustments {
if pos >= threshold {
adj = cumulative_adj;
} else {
break;
}
}
#[allow(
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_sign_loss
)]
let adjusted = (pos as i32 + adj).max(0) as usize;
adjusted
};
let mut new_bytecode = Vec::with_capacity(self.bytecode.len());
let mut src_pos = 0usize;
let mut sorted_indices: Vec<usize> = (0..self.fixups.len()).collect();
sorted_indices.sort_by_key(|&i| self.fixups[i].instruction_position);
for &idx in &sorted_indices {
let fixup = &self.fixups[idx];
if src_pos < fixup.instruction_position {
new_bytecode.extend_from_slice(&self.bytecode[src_pos..fixup.instruction_position]);
}
src_pos = fixup.instruction_position;
if shrinkable_set.contains(&idx) {
let short_mnemonic = fixup.short_form_mnemonic.ok_or_else(|| {
Error::InvalidMnemonic("missing short form for shrinkable branch".to_string())
})?;
let (opcode, prefix, _) = get_mnemonic_lookup()
.get(short_mnemonic)
.ok_or_else(|| Error::InvalidMnemonic(short_mnemonic.to_string()))?;
if *prefix != 0 {
new_bytecode.push(*prefix);
}
new_bytecode.push(*opcode);
new_bytecode.push(0);
src_pos = fixup.fixup_position + 4;
} else {
let instr_end = fixup.fixup_position + fixup.offset_size as usize;
new_bytecode.extend_from_slice(&self.bytecode[src_pos..instr_end]);
src_pos = instr_end;
}
}
if src_pos < self.bytecode.len() {
new_bytecode.extend_from_slice(&self.bytecode[src_pos..]);
}
for pos in self.labels.values_mut() {
#[allow(clippy::cast_possible_truncation)]
let adjusted_pos = adjust_position(*pos as usize) as u32;
*pos = adjusted_pos;
}
for (idx, fixup) in self.fixups.iter_mut().enumerate() {
let new_instr_pos = adjust_position(fixup.instruction_position);
if shrinkable_set.contains(&idx) {
fixup.instruction_position = new_instr_pos;
fixup.fixup_position = new_instr_pos + 1; fixup.offset_size = 1;
fixup.short_form_mnemonic = None; } else {
fixup.instruction_position = new_instr_pos;
fixup.fixup_position = adjust_position(fixup.fixup_position);
}
}
for switch_fixup in &mut self.switch_fixups {
switch_fixup.fixup_position = adjust_position(switch_fixup.fixup_position);
switch_fixup.instruction_end_position =
adjust_position(switch_fixup.instruction_end_position);
}
self.bytecode = new_bytecode;
Ok(())
}
fn emit_operand(&mut self, operand: Option<Operand>, expected: OperandType) -> Result<()> {
match expected {
OperandType::None => {
if operand.is_some() {
return Err(Error::UnexpectedOperand);
}
}
OperandType::Int8 => {
if let Some(Operand::Immediate(Immediate::Int8(val))) = operand {
self.bytecode.push(val.to_le_bytes()[0]);
} else {
return Err(Error::WrongOperandType {
expected: "Int8".to_string(),
});
}
}
OperandType::UInt8 => {
if let Some(Operand::Immediate(Immediate::UInt8(val))) = operand {
self.bytecode.push(val);
} else {
return Err(Error::WrongOperandType {
expected: "UInt8".to_string(),
});
}
}
OperandType::Int16 => {
if let Some(Operand::Immediate(Immediate::Int16(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "Int16".to_string(),
});
}
}
OperandType::UInt16 => {
if let Some(Operand::Immediate(Immediate::UInt16(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "UInt16".to_string(),
});
}
}
OperandType::Int32 => {
if let Some(Operand::Immediate(Immediate::Int32(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "Int32".to_string(),
});
}
}
OperandType::UInt32 => {
if let Some(Operand::Immediate(Immediate::UInt32(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "UInt32".to_string(),
});
}
}
OperandType::Int64 => {
if let Some(Operand::Immediate(Immediate::Int64(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "Int64".to_string(),
});
}
}
OperandType::UInt64 => {
if let Some(Operand::Immediate(Immediate::UInt64(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "UInt64".to_string(),
});
}
}
OperandType::Float32 => {
if let Some(Operand::Immediate(Immediate::Float32(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "Float32".to_string(),
});
}
}
OperandType::Float64 => {
if let Some(Operand::Immediate(Immediate::Float64(val))) = operand {
self.bytecode.extend_from_slice(&val.to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "Float64".to_string(),
});
}
}
OperandType::Token => {
if let Some(Operand::Token(token)) = operand {
self.bytecode
.extend_from_slice(&token.value().to_le_bytes());
} else {
return Err(Error::WrongOperandType {
expected: "Token".to_string(),
});
}
}
OperandType::Switch => {
if let Some(Operand::Switch(targets)) = operand {
let targets_len = u32::try_from(targets.len())
.map_err(|_| malformed_error!("Too many switch targets"))?;
self.bytecode.extend_from_slice(&targets_len.to_le_bytes());
for target in targets {
self.bytecode.extend_from_slice(&target.to_le_bytes());
}
} else {
return Err(Error::WrongOperandType {
expected: "Switch".to_string(),
});
}
}
}
Ok(())
}
fn write_branch_offset(&mut self, offset: i32, fixup: &LabelFixup) -> Result<()> {
match fixup.offset_size {
1 => {
if offset < i32::from(i8::MIN) || offset > i32::from(i8::MAX) {
return Err(Error::InvalidBranch(format!(
"offset {offset} out of range for 1-byte instruction"
)));
}
let offset_i8 = i8::try_from(offset)
.map_err(|_| malformed_error!("Branch offset exceeds i8 range"))?;
self.bytecode[fixup.fixup_position] = offset_i8.to_le_bytes()[0];
}
2 => {
if offset < i32::from(i16::MIN) || offset > i32::from(i16::MAX) {
return Err(Error::InvalidBranch(format!(
"offset {offset} out of range for 2-byte instruction"
)));
}
let offset_i16 = i16::try_from(offset)
.map_err(|_| malformed_error!("Branch offset exceeds i16 range"))?;
let bytes = offset_i16.to_le_bytes();
self.bytecode[fixup.fixup_position..fixup.fixup_position + 2]
.copy_from_slice(&bytes);
}
4 => {
let bytes = offset.to_le_bytes();
self.bytecode[fixup.fixup_position..fixup.fixup_position + 4]
.copy_from_slice(&bytes);
}
_ => {
return Err(Error::InvalidBranch(format!(
"invalid offset size: {} bytes",
fixup.offset_size
)))
}
}
Ok(())
}
fn update_stack_depth(&mut self, pops: u8, pushes: u8) -> Result<()> {
let net_effect = i16::from(pushes) - i16::from(pops);
self.current_stack_depth += net_effect;
if self.current_stack_depth < 0 && !self.unreachable {
return Err(malformed_error!(
"Stack underflow: depth became {} after instruction with {} pops, {} pushes",
self.current_stack_depth,
pops,
pushes
));
}
if self.current_stack_depth < 0 {
self.current_stack_depth = 0;
}
let current_depth_u16 = u16::try_from(self.current_stack_depth)
.map_err(|_| malformed_error!("Stack depth exceeds u16 range"))?;
self.max_stack_depth = self.max_stack_depth.max(current_depth_u16);
Ok(())
}
#[must_use]
pub fn max_stack_depth(&self) -> u16 {
self.max_stack_depth
}
#[must_use]
pub fn current_stack_depth(&self) -> i16 {
self.current_stack_depth
}
#[must_use]
pub fn get_label_position(&self, label_name: &str) -> Option<u32> {
self.labels.get(label_name).copied()
}
fn record_label_stack_depth(&mut self, label: &str) -> Result<()> {
if self.unreachable {
return Ok(());
}
if let Some(&expected) = self.label_stack_depths.get(label) {
if self.current_stack_depth != expected {
return Err(malformed_error!(
"Stack depth mismatch for branch to '{}': expected {}, but branch has {}",
label,
expected,
self.current_stack_depth
));
}
} else {
self.label_stack_depths
.insert(label.to_string(), self.current_stack_depth);
}
Ok(())
}
pub fn set_label_stack_depth(&mut self, label: &str, depth: i16) {
self.label_stack_depths.insert(label.to_string(), depth);
}
pub fn set_stack_depth(&mut self, depth: i16) {
self.current_stack_depth = depth;
if depth >= 0 {
self.max_stack_depth = self.max_stack_depth.max(depth.cast_unsigned());
}
}
}
impl Default for InstructionEncoder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::assembly::{Immediate, Operand};
#[test]
fn test_encoder_creation() {
let encoder = InstructionEncoder::new();
assert!(encoder.bytecode.is_empty());
assert!(encoder.labels.is_empty());
assert!(encoder.fixups.is_empty());
}
#[test]
fn test_simple_instruction_encoding() -> Result<()> {
let mut encoder = InstructionEncoder::new();
encoder.emit_instruction("nop", None)?;
encoder.emit_instruction("ret", None)?;
let (bytecode, _max_stack, _) = encoder.finalize()?;
assert_eq!(bytecode, vec![0x00, 0x2A]);
Ok(())
}
#[test]
fn test_instruction_with_operands() -> Result<()> {
let mut encoder = InstructionEncoder::new();
encoder.emit_instruction("ldarg.s", Some(Operand::Immediate(Immediate::UInt8(1))))?;
encoder.emit_instruction("ldc.i4.s", Some(Operand::Immediate(Immediate::Int8(42))))?;
let (bytecode, _max_stack, _) = encoder.finalize()?;
assert_eq!(bytecode, vec![0x0E, 0x01, 0x1F, 42]);
Ok(())
}
#[test]
fn test_label_resolution() -> Result<()> {
let mut encoder = InstructionEncoder::new();
encoder.emit_instruction("nop", None)?; encoder.emit_branch("br.s", "target")?; encoder.emit_instruction("nop", None)?; encoder.define_label("target")?;
encoder.emit_instruction("ret", None)?;
let (bytecode, _max_stack, _) = encoder.finalize()?;
assert_eq!(bytecode, vec![0x00, 0x2B, 0x01, 0x00, 0x2A]);
Ok(())
}
#[test]
fn test_invalid_mnemonic() {
let mut encoder = InstructionEncoder::new();
let result = encoder.emit_instruction("invalid_instruction", None);
assert!(result.is_err());
}
#[test]
fn test_wrong_operand_type() {
let mut encoder = InstructionEncoder::new();
let result =
encoder.emit_instruction("ldarg.s", Some(Operand::Immediate(Immediate::UInt32(1))));
assert!(result.is_err());
}
#[test]
fn test_undefined_label() {
let mut encoder = InstructionEncoder::new();
encoder.emit_branch("br.s", "undefined_label").unwrap();
let result = encoder.finalize();
assert!(result.is_err());
}
#[test]
fn test_duplicate_label() -> Result<()> {
let mut encoder = InstructionEncoder::new();
encoder.define_label("test_label")?;
let result = encoder.define_label("test_label");
assert!(result.is_err());
Ok(())
}
#[test]
fn test_reverse_lookup_table_completeness() {
let mut instruction_count = 0;
for instr in INSTRUCTIONS.iter() {
if !instr.instr.is_empty() {
instruction_count += 1;
assert!(get_mnemonic_lookup().contains_key(instr.instr));
}
}
for instr in INSTRUCTIONS_FE.iter() {
if !instr.instr.is_empty() {
instruction_count += 1;
assert!(get_mnemonic_lookup().contains_key(instr.instr));
}
}
assert_eq!(get_mnemonic_lookup().len(), instruction_count);
}
}