Expand description
§Charm
ARM assembler/disassembler auto-generated from the ARM Exploration Tools.
Very early version/POC, the API will change in the future. Only handles general T32/A32/A64 base instructions and has no lexer. Tested against LLVM.
Inspired by https://github.com/icedland/iced.
§Documentation
cargo doc --open§Tests
cargo test --release§Examples
§A64
use charm::core::a64::config::*;
use charm::core::a64::consts::*;
use charm::core::a64::decoder::*;
use charm::core::a64::encoder::*;
use charm::core::a64::formatter::*;
use charm::core::a64::instruction::*;
use charm::core::a64::operand::*;
use charm::error::*;
pub fn main() -> Result<()> {
// -------------------------------------------------------------------------------------------
// Decoding
// -------------------------------------------------------------------------------------------
// Original instructions to decode at address 0xdeadbeefdead0000.
let code = vec![
// function:
0xfd, 0x7b, 0x01, 0xa9, // stp x29, x30, [sp, #16]
0x03, 0x00, 0x40, 0xf9, // ldr x3, [x0]
0x24, 0x00, 0x40, 0xf9, // ldr x4, [x1]
0x7f, 0x00, 0x04, 0xeb, // cmp x3, x4
0x61, 0x00, 0x00, 0x54, // bne not_equal
0x65, 0xff, 0xff, 0x10, // adr x5, function
0x03, 0x00, 0x00, 0x14, // b end
// not_equal:
0x05, 0x00, 0x80, 0xd2, // mov x5, #0
0xb3, 0x2f, 0x00, 0x14, // b 0xdeadbeefdeadbeec
// end:
0x45, 0x00, 0x00, 0xf9, // str x5, [x2]
0xfd, 0x7b, 0x41, 0xa9, // ldp x29, x30, [sp, #16]
0xc0, 0x03, 0x5f, 0xd6, // ret
];
// Configuring the decoder to behave like LLVM.
let config = ConfigLLVM::new();
// We decode all the instructions into an instruction block.
let mut decoder = Decoder::new(&code, config.clone());
let mut block = decoder.decode_block(0xdeadbeefdead0000)?;
// The instructions/labels expected in the block.
let mut expected_instructions: Vec<InstructionBlockElement> = vec![
0xdeadbeefdead0000.into(),
Instruction::with_4(
Code::STP_64_ldstpair_off,
Register::X29,
Register::X30,
Register::SP,
16i32,
)?
.into(),
Instruction::with_3(Code::LDR_64_ldst_pos, Register::X3, Register::X0, 0u32)?.into(),
Instruction::with_3(Code::LDR_64_ldst_pos, Register::X4, Register::X1, 0u32)?.into(),
Instruction::with_3(
Code::CMP_SUBS_64_addsub_shift,
Register::X3,
Register::X4,
Extension::Lsl(0),
)?
.into(),
Instruction::with_2(
Code::B_only_condbranch,
Condition::Ne,
Label::LabelName(0xdeadbeefdead001c),
)?
.into(),
Instruction::with_2(
Code::ADR_only_pcreladdr,
Register::X5,
Label::LabelName(0xdeadbeefdead0000),
)?
.into(),
Instruction::with_1(
Code::B_only_branch_imm,
Label::LabelName(0xdeadbeefdead0024),
)?
.into(),
0xdeadbeefdead001c.into(),
Instruction::with_2(Code::MOV_MOVZ_64_movewide, Register::X5, 0i64)?.into(),
Instruction::with_1(
Code::B_only_branch_imm,
Label::LabelName(0xdeadbeefdeadbeec),
)?
.into(),
0xdeadbeefdead0024.into(),
Instruction::with_3(Code::STR_64_ldst_pos, Register::X5, Register::X2, 0u32)?.into(),
Instruction::with_4(
Code::LDP_64_ldstpair_off,
Register::X29,
Register::X30,
Register::SP,
16i32,
)?
.into(),
Instruction::with_1(Code::RET_64R_branch_reg, Register::X30)?.into(),
];
// Here we need to manually update the address of each instruction to make the assert succeed.
let mut pc = 0xdeadbeefdead0000;
for element in &mut expected_instructions {
match element {
InstructionBlockElement::Label(_) => {}
InstructionBlockElement::Instruction(i) => {
i.pc = pc;
pc += i.size as u64;
}
}
}
let expected_block =
InstructionBlock::with_instructions(0xdeadbeefdead0000, expected_instructions);
assert_eq!(block, expected_block);
// -------------------------------------------------------------------------------------------
// Encoding
// -------------------------------------------------------------------------------------------
// The new address where we want to reencode our block
block.pc = 0xdeadbeefdeed0000;
let encoder_block = EncoderBlock::new(vec![block.clone()]);
let code_blocks = encoder_block.encode()?;
// The expected output, which are the original instruction but at address 0xdeadbeefdeed0000.
let expected_code = [
0xfd, 0x7b, 0x01, 0xa9, 0x03, 0x00, 0x40, 0xf9, 0x24, 0x00, 0x40, 0xf9, 0x7f, 0x00, 0x04,
0xeb, 0x61, 0x00, 0x00, 0x54, 0x65, 0xff, 0xff, 0x10, 0x03, 0x00, 0x00, 0x14, 0x05, 0x00,
0x80, 0xd2, 0xb3, 0x2f, 0xf0, 0x17, 0x45, 0x00, 0x00, 0xf9, 0xfd, 0x7b, 0x41, 0xa9, 0xc0,
0x03, 0x5f, 0xd6,
];
assert_eq!(expected_code, code_blocks[0].data[..]);
// -------------------------------------------------------------------------------------------
// Formatting
// -------------------------------------------------------------------------------------------
let mut output = String::new();
let mut formatter = Fmt {};
let mut config = ConfigLLVM::new();
// Settings can be applied globally and per-instruction.
// Here we change the output of the branch instruction to format label names as hexadecimal
// numbers instead of decimal ones.
config
.instructions
.b_only_condbranch
.syntax
.positive_integer_format = Some(FormatInteger::HexadecimalUnsigned);
config
.instructions
.b_only_condbranch
.syntax
.negative_integer_format = Some(FormatInteger::HexadecimalUnsigned);
config
.instructions
.b_only_branch_imm
.syntax
.positive_integer_format = Some(FormatInteger::HexadecimalUnsigned);
config
.instructions
.b_only_branch_imm
.syntax
.negative_integer_format = Some(FormatInteger::HexadecimalUnsigned);
config
.instructions
.adr_only_pcreladdr
.syntax
.positive_integer_format = Some(FormatInteger::HexadecimalUnsigned);
config
.instructions
.adr_only_pcreladdr
.syntax
.negative_integer_format = Some(FormatInteger::HexadecimalUnsigned);
// We format each instruction and separate them with a line return.
for element in block.instructions {
match element {
InstructionBlockElement::Label(_) => {}
InstructionBlockElement::Instruction(i) => {
formatter.format(&mut output, &config, &i)?;
output.push_str("\n");
}
}
}
// The expected output.
assert_eq!(
output,
"stp x29, x30, [sp, #16]\n\
ldr x3, [x0]\n\
ldr x4, [x1]\n\
cmp x3, x4\n\
b.ne #0xdeadbeefdead001c\n\
adr x5, #0xdeadbeefdead0000\n\
b #0xdeadbeefdead0024\n\
mov x5, #0\n\
b #0xdeadbeefdeadbeec\n\
str x5, [x2]\n\
ldp x29, x30, [sp, #16]\n\
ret\n"
);
Ok(())
}§A32
use charm::core::a32::config::*;
use charm::core::a32::consts::*;
use charm::core::a32::decoder::*;
use charm::core::a32::encoder::*;
use charm::core::a32::formatter::*;
use charm::core::a32::instruction::*;
use charm::core::a32::operand::*;
use charm::error::*;
pub fn main() -> Result<()> {
// -------------------------------------------------------------------------------------------
// Decoding
// -------------------------------------------------------------------------------------------
// Original instructions to decode at address 0xdead0000.
let code = vec![
// function:
0x04, 0xe0, 0x0d, 0xe5, // str lr, [sp, #-4]
0x00, 0x30, 0x90, 0xe5, // ldr r3, [r0]
0x00, 0x40, 0x91, 0xe5, // ldr r4, [r1]
0x04, 0x00, 0x53, 0xe1, // cmp r3, r4
0x00, 0x00, 0x00, 0x0a, // beq equal
// end:
0xb4, 0x2f, 0x00, 0xea, // b 0xdeadbeec
// equal:
0x20, 0x50, 0x4f, 0xe2, // adr r5, function
0x00, 0x50, 0x82, 0xe5, // str r5, [r2]
0xfb, 0xff, 0xff, 0xea, // b end
];
// Configuring the decoder to behave like LLVM.
let config = ConfigLLVM::new();
// We decode all the instructions into an instruction block.
let mut decoder = Decoder::new(&code, config.clone());
let mut block = decoder.decode_block(0xdead0000)?;
// The instructions/labels expected in the block.
let mut expected_instructions: Vec<InstructionBlockElement> = vec![
Instruction::with_5(
Code::STR_i_A1_pre,
MnemonicCondition::Al,
Register::R14,
Register::R13,
PlusMinus::Minus,
4u32,
)?
.into(),
Instruction::with_5(
Code::LDR_i_A1_off,
MnemonicCondition::Al,
Register::R3,
Register::R0,
PlusMinus::Plus,
0u32,
)?
.into(),
Instruction::with_5(
Code::LDR_i_A1_off,
MnemonicCondition::Al,
Register::R4,
Register::R1,
PlusMinus::Plus,
0u32,
)?
.into(),
Instruction::with_3(
Code::CMP_r_A1,
MnemonicCondition::Al,
Register::R3,
Register::R4,
)?
.into(),
Instruction::with_2(
Code::B_A1,
MnemonicCondition::Eq,
Label::LabelName(0xdead0018),
)?
.into(),
0xdead0014.into(),
Instruction::with_2(
Code::B_A1,
MnemonicCondition::Al,
Label::LabelName(0xdeadbeec),
)?
.into(),
0xdead0018.into(),
Instruction::with_4(
Code::SUB_ADR_A2,
MnemonicCondition::Al,
Register::R5,
Register::R15,
ModifiedImmediate(0, 32),
)?
.into(),
Instruction::with_5(
Code::STR_i_A1_off,
MnemonicCondition::Al,
Register::R5,
Register::R2,
PlusMinus::Plus,
0u32,
)?
.into(),
Instruction::with_2(
Code::B_A1,
MnemonicCondition::Al,
Label::LabelName(0xdead0014),
)?
.into(),
];
// Here we need to manually update the address of each instruction to make the assert succeed.
let mut pc = 0xdead0000;
for element in &mut expected_instructions {
match element {
InstructionBlockElement::Label(_) => {}
InstructionBlockElement::Instruction(i) => {
i.pc = pc;
pc += i.size as u32;
}
}
}
let expected_block = InstructionBlock::with_instructions(0xdead0000, expected_instructions);
assert_eq!(block, expected_block);
// -------------------------------------------------------------------------------------------
// Encoding
// -------------------------------------------------------------------------------------------
// The new address where we want to reencode our block
block.pc = 0xdeed0000;
let encoder_block = EncoderBlock::new(vec![block.clone()]);
let code_blocks = encoder_block.encode()?;
// The expected output, which are the original instruction but at address 0xdeed0000.
let expected_code = [
0x04, 0xe0, 0x0d, 0xe5, 0x00, 0x30, 0x90, 0xe5, 0x00, 0x40, 0x91, 0xe5, 0x04, 0x00, 0x53,
0xe1, 0x00, 0x00, 0x00, 0x0a, 0xb4, 0x2f, 0xf0, 0xea, 0x20, 0x50, 0x4f, 0xe2, 0x00, 0x50,
0x82, 0xe5, 0xfb, 0xff, 0xff, 0xea,
];
assert_eq!(expected_code, code_blocks[0].data[..]);
// -------------------------------------------------------------------------------------------
// Formatting
// -------------------------------------------------------------------------------------------
let mut output = String::new();
let mut formatter = Fmt {};
let mut config = ConfigLLVM::new();
// Settings can be applied globally and per-instruction.
// Here we change the output of the branch instruction to format label names as hexadecimal
// numbers instead of decimal ones.
config.instructions.b_a1.syntax.positive_integer_format =
Some(FormatInteger::HexadecimalUnsigned);
// We format each instruction and separate them with a line return.
for element in block.instructions {
match element {
InstructionBlockElement::Label(_) => {}
InstructionBlockElement::Instruction(i) => {
formatter.format(&mut output, &config, &i)?;
output.push_str("\n");
}
}
}
// The expected output.
assert_eq!(
output,
"str lr, [sp, #-4]\n\
ldr r3, [r0]\n\
ldr r4, [r1]\n\
cmp r3, r4\n\
beq #0xdead0018\n\
b #0xdeadbeec\n\
sub r5, pc, #32\n\
str r5, [r2]\n\
b #0xdead0014\n"
);
Ok(())
}§T32
use charm::core::t32::config::*;
use charm::core::t32::consts::*;
use charm::core::t32::decoder::*;
use charm::core::t32::encoder::*;
use charm::core::t32::formatter::*;
use charm::core::t32::instruction::*;
use charm::core::t32::operand::*;
use charm::error::*;
pub fn main() -> Result<()> {
// -------------------------------------------------------------------------------------------
// Decoding
// -------------------------------------------------------------------------------------------
// Original instructions to decode at address 0xdead0000.
let code = vec![
// function:
0x4d, 0xf8, 0x04, 0xec, // str lr, [sp, #-4]
0x03, 0x68, // ldr r3, [r0]
0x0c, 0x68, // ldr r4, [r1]
0xa3, 0x42, // cmp r3, r4
0x0e, 0xbf, // itee eq
0xaf, 0xf2, 0x10, 0x05, // adreq r5, function
0x00, 0x25, // movne r5, #0
0x00, 0xf0, 0x7c, 0xbe, // bne 0xdead0d0e
0x5d, 0xf8, 0x04, 0xfc, // ldr pc, [sp, #-4]
];
// Configuring the decoder to behave like LLVM.
let config = ConfigLLVM::new();
// We decode all the instructions into an instruction block.
let mut decoder = Decoder::new(&code, config.clone());
let mut block = decoder.decode_block(0xdead0000)?;
// The instructions/labels expected in the block.
let mut expected_instructions: Vec<InstructionBlockElement> = vec![
0xdead0000.into(),
Instruction::with_4(
Code::STR_i_T4_off,
MnemonicCondition::Al,
Register::R14,
Register::R13,
4u32,
)?
.into(),
Instruction::with_4(
Code::LDR_i_T1,
MnemonicCondition::Al,
Register::R3,
Register::R0,
0u32,
)?
.into(),
Instruction::with_4(
Code::LDR_i_T1,
MnemonicCondition::Al,
Register::R4,
Register::R1,
0u32,
)?
.into(),
Instruction::with_3(
Code::CMP_r_T1,
MnemonicCondition::Al,
Register::R3,
Register::R4,
)?
.into(),
Instruction::with_4(
Code::IT_T1,
ItCondition::Else,
ItCondition::Else,
ItCondition::None,
Condition::Eq,
)?
.into(),
Instruction::with_3(
Code::ADR_T2,
MnemonicCondition::Eq,
Register::R5,
Label::LabelName(0xdead0000),
)?
.into(),
Instruction::with_encoding_3(
Code::MOV_i_T1,
Encoding::Alt1,
MnemonicCondition::Ne,
Register::R5,
0u32,
)?
.into(),
Instruction::with_encoding_2(
Code::B_T4,
Encoding::Alt1,
MnemonicCondition::Ne,
Label::LabelName(0xdead0d0e),
)?
.into(),
Instruction::with_4(
Code::LDR_i_T4_off,
MnemonicCondition::Al,
Register::R15,
Register::R13,
4u32,
)?
.into(),
];
// Here we need to manually update the address of each instruction to make the assert succeed.
let mut pc = 0xdead0000;
for element in &mut expected_instructions {
match element {
InstructionBlockElement::Label(_) => {}
InstructionBlockElement::Instruction(i) => {
i.pc = pc;
pc += i.size as u32;
}
}
}
let expected_block = InstructionBlock::with_instructions(0xdead0000, expected_instructions);
assert_eq!(block, expected_block);
// -------------------------------------------------------------------------------------------
// Encoding
// -------------------------------------------------------------------------------------------
// The new address where we want to reencode our block
block.pc = 0xdeed0000;
let encoder_block = EncoderBlock::new(vec![block.clone()]);
let code_blocks = encoder_block.encode()?;
// The expected output, which are the original instruction but at address 0xdeed0000.
let expected_code = [
0x4d, 0xf8, 0x04, 0xec, 0x03, 0x68, 0x0c, 0x68, 0xa3, 0x42, 0x0e, 0xbf, 0xaf, 0xf2, 0x10,
0x05, 0x00, 0x25, 0x00, 0xf4, 0x7c, 0xbe, 0x5d, 0xf8, 0x04, 0xfc,
];
assert_eq!(expected_code, code_blocks[0].data[..]);
// -------------------------------------------------------------------------------------------
// Formatting
// -------------------------------------------------------------------------------------------
let mut output = String::new();
let mut formatter = Fmt {};
let mut config = ConfigLLVM::new();
// Settings can be applied globally and per-instruction.
// Here we change the output of the branch instruction to format label names as hexadecimal
// numbers instead of decimal ones.
config.instructions.b_t4.syntax.positive_integer_format =
Some(FormatInteger::HexadecimalUnsigned);
config.instructions.b_t4.syntax.negative_integer_format =
Some(FormatInteger::HexadecimalUnsigned);
config.instructions.adr_t2.syntax.positive_integer_format =
Some(FormatInteger::HexadecimalUnsigned);
config.instructions.adr_t2.syntax.negative_integer_format =
Some(FormatInteger::HexadecimalUnsigned);
// We format each instruction and separate them with a line return.
for element in block.instructions {
match element {
InstructionBlockElement::Label(_) => {}
InstructionBlockElement::Instruction(i) => {
formatter.format(&mut output, &config, &i)?;
output.push_str("\n");
}
}
}
// The expected output.
assert_eq!(
output,
"str lr, [sp, #-4]\n\
ldr r3, [r0]\n\
ldr r4, [r1]\n\
cmp r3, r4\n\
itee eq\n\
adreq.w r5, #0xdead0000\n\
movne r5, #0\n\
bne.w #0xdead0d0e\n\
ldr pc, [sp, #-4]\n"
);
Ok(())
}§To-Do
- Encoder/Decoder
- Lexer
- System registers
- ARM features
- ARM pseudocode parser (extract information from decode/operation pseudocode)
- Remaining instruction types (system, simd/fp, sve, sme, etc.)
- Errors
- Serde
- Generator
- Code refactor
- Tests
- Test against additional disassemblers
- Fuzzing
- Benchmark
- CI
- Misc
- Documentation