charm 0.0.1

ARM assembler & disassembler generated from the ARM exploration tools.
Documentation
  • Coverage
  • 77.33%
    34398 out of 44480 items documented1 out of 33888 items with examples
  • Size
  • Source code size: 45.22 MB This is the summed size of all the files inside the crates.io package for this release.
  • Documentation size: 2.41 GB This is the summed size of all files generated by rustdoc for all configured targets
  • Ø build duration
  • this release: 8m 12s Average build duration of successful builds.
  • all releases: 1m 30s Average build duration of successful builds in releases after 2024-10-23.
  • Links
  • Homepage
  • 0xLyte/charm
    0 0 0
  • crates.io
  • Dependencies
  • Versions
  • Owners
  • 0xLyte

Charm

ARM assembler/disassembler auto-generated from the ARM Exploration Tools.

Very early version/POC, the API will change in the future. Only handles general T32/A32/A64 base instructions and has no lexer. Tested against LLVM.

Inspired by https://github.com/icedland/iced.

Documentation

cargo doc --open

Tests

cargo test --release

Examples

A64

use charm::core::a64::config::*;
use charm::core::a64::consts::*;
use charm::core::a64::decoder::*;
use charm::core::a64::encoder::*;
use charm::core::a64::formatter::*;
use charm::core::a64::instruction::*;
use charm::core::a64::operand::*;
use charm::error::*;

pub fn main() -> Result<()> {
    // -------------------------------------------------------------------------------------------
    // Decoding
    // -------------------------------------------------------------------------------------------

    // Original instructions to decode at address 0xdeadbeefdead0000.
    let code = vec![
        // function:
        0xfd, 0x7b, 0x01, 0xa9, // stp x29, x30, [sp, #16]
        0x03, 0x00, 0x40, 0xf9, // ldr x3, [x0]
        0x24, 0x00, 0x40, 0xf9, // ldr x4, [x1]
        0x7f, 0x00, 0x04, 0xeb, // cmp x3, x4
        0x61, 0x00, 0x00, 0x54, // bne not_equal
        0x65, 0xff, 0xff, 0x10, // adr x5, function
        0x03, 0x00, 0x00, 0x14, // b end
        // not_equal:
        0x05, 0x00, 0x80, 0xd2, // mov x5, #0
        0xb3, 0x2f, 0x00, 0x14, // b 0xdeadbeefdeadbeec
        // end:
        0x45, 0x00, 0x00, 0xf9, // str x5, [x2]
        0xfd, 0x7b, 0x41, 0xa9, // ldp x29, x30, [sp, #16]
        0xc0, 0x03, 0x5f, 0xd6, // ret
    ];

    // Configuring the decoder to behave like LLVM.
    let config = ConfigLLVM::new();

    // We decode all the instructions into an instruction block.
    let mut decoder = Decoder::new(&code, config.clone());
    let mut block = decoder.decode_block(0xdeadbeefdead0000)?;

    // The instructions/labels expected in the block.
    let mut expected_instructions: Vec<InstructionBlockElement> = vec![
        0xdeadbeefdead0000.into(),
        Instruction::with_4(
            Code::STP_64_ldstpair_off,
            Register::X29,
            Register::X30,
            Register::SP,
            16i32,
        )?
        .into(),
        Instruction::with_3(Code::LDR_64_ldst_pos, Register::X3, Register::X0, 0u32)?.into(),
        Instruction::with_3(Code::LDR_64_ldst_pos, Register::X4, Register::X1, 0u32)?.into(),
        Instruction::with_3(
            Code::CMP_SUBS_64_addsub_shift,
            Register::X3,
            Register::X4,
            Extension::Lsl(0),
        )?
        .into(),
        Instruction::with_2(
            Code::B_only_condbranch,
            Condition::Ne,
            Label::LabelName(0xdeadbeefdead001c),
        )?
        .into(),
        Instruction::with_2(
            Code::ADR_only_pcreladdr,
            Register::X5,
            Label::LabelName(0xdeadbeefdead0000),
        )?
        .into(),
        Instruction::with_1(
            Code::B_only_branch_imm,
            Label::LabelName(0xdeadbeefdead0024),
        )?
        .into(),
        0xdeadbeefdead001c.into(),
        Instruction::with_2(Code::MOV_MOVZ_64_movewide, Register::X5, 0i64)?.into(),
        Instruction::with_1(
            Code::B_only_branch_imm,
            Label::LabelName(0xdeadbeefdeadbeec),
        )?
        .into(),
        0xdeadbeefdead0024.into(),
        Instruction::with_3(Code::STR_64_ldst_pos, Register::X5, Register::X2, 0u32)?.into(),
        Instruction::with_4(
            Code::LDP_64_ldstpair_off,
            Register::X29,
            Register::X30,
            Register::SP,
            16i32,
        )?
        .into(),
        Instruction::with_1(Code::RET_64R_branch_reg, Register::X30)?.into(),
    ];
    // Here we need to manually update the address of each instruction to make the assert succeed.
    let mut pc = 0xdeadbeefdead0000;
    for element in &mut expected_instructions {
        match element {
            InstructionBlockElement::Label(_) => {}
            InstructionBlockElement::Instruction(i) => {
                i.pc = pc;
                pc += i.size as u64;
            }
        }
    }
    let expected_block =
        InstructionBlock::with_instructions(0xdeadbeefdead0000, expected_instructions);
    assert_eq!(block, expected_block);

    // -------------------------------------------------------------------------------------------
    // Encoding
    // -------------------------------------------------------------------------------------------

    // The new address where we want to reencode our block
    block.pc = 0xdeadbeefdeed0000;
    let encoder_block = EncoderBlock::new(vec![block.clone()]);
    let code_blocks = encoder_block.encode()?;

    // The expected output, which are the original instruction but at address 0xdeadbeefdeed0000.
    let expected_code = [
        0xfd, 0x7b, 0x01, 0xa9, 0x03, 0x00, 0x40, 0xf9, 0x24, 0x00, 0x40, 0xf9, 0x7f, 0x00, 0x04,
        0xeb, 0x61, 0x00, 0x00, 0x54, 0x65, 0xff, 0xff, 0x10, 0x03, 0x00, 0x00, 0x14, 0x05, 0x00,
        0x80, 0xd2, 0xb3, 0x2f, 0xf0, 0x17, 0x45, 0x00, 0x00, 0xf9, 0xfd, 0x7b, 0x41, 0xa9, 0xc0,
        0x03, 0x5f, 0xd6,
    ];
    assert_eq!(expected_code, code_blocks[0].data[..]);

    // -------------------------------------------------------------------------------------------
    // Formatting
    // -------------------------------------------------------------------------------------------

    let mut output = String::new();
    let mut formatter = Fmt {};
    let mut config = ConfigLLVM::new();

    // Settings can be applied globally and per-instruction.
    // Here we change the output of the branch instruction to format label names as hexadecimal
    // numbers instead of decimal ones.
    config
        .instructions
        .b_only_condbranch
        .syntax
        .positive_integer_format = Some(FormatInteger::HexadecimalUnsigned);
    config
        .instructions
        .b_only_condbranch
        .syntax
        .negative_integer_format = Some(FormatInteger::HexadecimalUnsigned);
    config
        .instructions
        .b_only_branch_imm
        .syntax
        .positive_integer_format = Some(FormatInteger::HexadecimalUnsigned);
    config
        .instructions
        .b_only_branch_imm
        .syntax
        .negative_integer_format = Some(FormatInteger::HexadecimalUnsigned);
    config
        .instructions
        .adr_only_pcreladdr
        .syntax
        .positive_integer_format = Some(FormatInteger::HexadecimalUnsigned);
    config
        .instructions
        .adr_only_pcreladdr
        .syntax
        .negative_integer_format = Some(FormatInteger::HexadecimalUnsigned);

    // We format each instruction and separate them with a line return.
    for element in block.instructions {
        match element {
            InstructionBlockElement::Label(_) => {}
            InstructionBlockElement::Instruction(i) => {
                formatter.format(&mut output, &config, &i)?;
                output.push_str("\n");
            }
        }
    }

    // The expected output.
    assert_eq!(
        output,
        "stp x29, x30, [sp, #16]\n\
        ldr x3, [x0]\n\
        ldr x4, [x1]\n\
        cmp x3, x4\n\
        b.ne #0xdeadbeefdead001c\n\
        adr x5, #0xdeadbeefdead0000\n\
        b #0xdeadbeefdead0024\n\
        mov x5, #0\n\
        b #0xdeadbeefdeadbeec\n\
        str x5, [x2]\n\
        ldp x29, x30, [sp, #16]\n\
        ret\n"
    );

    Ok(())
}

A32

use charm::core::a32::config::*;
use charm::core::a32::consts::*;
use charm::core::a32::decoder::*;
use charm::core::a32::encoder::*;
use charm::core::a32::formatter::*;
use charm::core::a32::instruction::*;
use charm::core::a32::operand::*;
use charm::error::*;

pub fn main() -> Result<()> {
    // -------------------------------------------------------------------------------------------
    // Decoding
    // -------------------------------------------------------------------------------------------

    // Original instructions to decode at address 0xdead0000.
    let code = vec![
        // function:
        0x04, 0xe0, 0x0d, 0xe5, // str lr, [sp, #-4]
        0x00, 0x30, 0x90, 0xe5, // ldr r3, [r0]
        0x00, 0x40, 0x91, 0xe5, // ldr r4, [r1]
        0x04, 0x00, 0x53, 0xe1, // cmp r3, r4
        0x00, 0x00, 0x00, 0x0a, // beq equal
        // end:
        0xb4, 0x2f, 0x00, 0xea, // b 0xdeadbeec
        // equal:
        0x20, 0x50, 0x4f, 0xe2, // adr r5, function
        0x00, 0x50, 0x82, 0xe5, // str r5, [r2]
        0xfb, 0xff, 0xff, 0xea, // b end
    ];

    // Configuring the decoder to behave like LLVM.
    let config = ConfigLLVM::new();

    // We decode all the instructions into an instruction block.
    let mut decoder = Decoder::new(&code, config.clone());
    let mut block = decoder.decode_block(0xdead0000)?;

    // The instructions/labels expected in the block.
    let mut expected_instructions: Vec<InstructionBlockElement> = vec![
        Instruction::with_5(
            Code::STR_i_A1_pre,
            MnemonicCondition::Al,
            Register::R14,
            Register::R13,
            PlusMinus::Minus,
            4u32,
        )?
        .into(),
        Instruction::with_5(
            Code::LDR_i_A1_off,
            MnemonicCondition::Al,
            Register::R3,
            Register::R0,
            PlusMinus::Plus,
            0u32,
        )?
        .into(),
        Instruction::with_5(
            Code::LDR_i_A1_off,
            MnemonicCondition::Al,
            Register::R4,
            Register::R1,
            PlusMinus::Plus,
            0u32,
        )?
        .into(),
        Instruction::with_3(
            Code::CMP_r_A1,
            MnemonicCondition::Al,
            Register::R3,
            Register::R4,
        )?
        .into(),
        Instruction::with_2(
            Code::B_A1,
            MnemonicCondition::Eq,
            Label::LabelName(0xdead0018),
        )?
        .into(),
        0xdead0014.into(),
        Instruction::with_2(
            Code::B_A1,
            MnemonicCondition::Al,
            Label::LabelName(0xdeadbeec),
        )?
        .into(),
        0xdead0018.into(),
        Instruction::with_4(
            Code::SUB_ADR_A2,
            MnemonicCondition::Al,
            Register::R5,
            Register::R15,
            ModifiedImmediate(0, 32),
        )?
        .into(),
        Instruction::with_5(
            Code::STR_i_A1_off,
            MnemonicCondition::Al,
            Register::R5,
            Register::R2,
            PlusMinus::Plus,
            0u32,
        )?
        .into(),
        Instruction::with_2(
            Code::B_A1,
            MnemonicCondition::Al,
            Label::LabelName(0xdead0014),
        )?
        .into(),
    ];
    // Here we need to manually update the address of each instruction to make the assert succeed.
    let mut pc = 0xdead0000;
    for element in &mut expected_instructions {
        match element {
            InstructionBlockElement::Label(_) => {}
            InstructionBlockElement::Instruction(i) => {
                i.pc = pc;
                pc += i.size as u32;
            }
        }
    }
    let expected_block = InstructionBlock::with_instructions(0xdead0000, expected_instructions);
    assert_eq!(block, expected_block);

    // -------------------------------------------------------------------------------------------
    // Encoding
    // -------------------------------------------------------------------------------------------

    // The new address where we want to reencode our block
    block.pc = 0xdeed0000;
    let encoder_block = EncoderBlock::new(vec![block.clone()]);
    let code_blocks = encoder_block.encode()?;

    // The expected output, which are the original instruction but at address 0xdeed0000.
    let expected_code = [
        0x04, 0xe0, 0x0d, 0xe5, 0x00, 0x30, 0x90, 0xe5, 0x00, 0x40, 0x91, 0xe5, 0x04, 0x00, 0x53,
        0xe1, 0x00, 0x00, 0x00, 0x0a, 0xb4, 0x2f, 0xf0, 0xea, 0x20, 0x50, 0x4f, 0xe2, 0x00, 0x50,
        0x82, 0xe5, 0xfb, 0xff, 0xff, 0xea,
    ];
    assert_eq!(expected_code, code_blocks[0].data[..]);

    // -------------------------------------------------------------------------------------------
    // Formatting
    // -------------------------------------------------------------------------------------------

    let mut output = String::new();
    let mut formatter = Fmt {};
    let mut config = ConfigLLVM::new();

    // Settings can be applied globally and per-instruction.
    // Here we change the output of the branch instruction to format label names as hexadecimal
    // numbers instead of decimal ones.
    config.instructions.b_a1.syntax.positive_integer_format =
        Some(FormatInteger::HexadecimalUnsigned);

    // We format each instruction and separate them with a line return.
    for element in block.instructions {
        match element {
            InstructionBlockElement::Label(_) => {}
            InstructionBlockElement::Instruction(i) => {
                formatter.format(&mut output, &config, &i)?;
                output.push_str("\n");
            }
        }
    }

    // The expected output.
    assert_eq!(
        output,
        "str lr, [sp, #-4]\n\
        ldr r3, [r0]\n\
        ldr r4, [r1]\n\
        cmp r3, r4\n\
        beq #0xdead0018\n\
        b #0xdeadbeec\n\
        sub r5, pc, #32\n\
        str r5, [r2]\n\
        b #0xdead0014\n"
    );

    Ok(())
}

T32

use charm::core::t32::config::*;
use charm::core::t32::consts::*;
use charm::core::t32::decoder::*;
use charm::core::t32::encoder::*;
use charm::core::t32::formatter::*;
use charm::core::t32::instruction::*;
use charm::core::t32::operand::*;
use charm::error::*;

pub fn main() -> Result<()> {
    // -------------------------------------------------------------------------------------------
    // Decoding
    // -------------------------------------------------------------------------------------------

    // Original instructions to decode at address 0xdead0000.
    let code = vec![
        // function:
        0x4d, 0xf8, 0x04, 0xec, // str lr, [sp, #-4]
        0x03, 0x68, // ldr r3, [r0]
        0x0c, 0x68, // ldr r4, [r1]
        0xa3, 0x42, // cmp r3, r4
        0x0e, 0xbf, // itee eq
        0xaf, 0xf2, 0x10, 0x05, // adreq r5, function
        0x00, 0x25, // movne r5, #0
        0x00, 0xf0, 0x7c, 0xbe, // bne 0xdead0d0e
        0x5d, 0xf8, 0x04, 0xfc, // ldr pc, [sp, #-4]
    ];

    // Configuring the decoder to behave like LLVM.
    let config = ConfigLLVM::new();

    // We decode all the instructions into an instruction block.
    let mut decoder = Decoder::new(&code, config.clone());
    let mut block = decoder.decode_block(0xdead0000)?;

    // The instructions/labels expected in the block.
    let mut expected_instructions: Vec<InstructionBlockElement> = vec![
        0xdead0000.into(),
        Instruction::with_4(
            Code::STR_i_T4_off,
            MnemonicCondition::Al,
            Register::R14,
            Register::R13,
            4u32,
        )?
        .into(),
        Instruction::with_4(
            Code::LDR_i_T1,
            MnemonicCondition::Al,
            Register::R3,
            Register::R0,
            0u32,
        )?
        .into(),
        Instruction::with_4(
            Code::LDR_i_T1,
            MnemonicCondition::Al,
            Register::R4,
            Register::R1,
            0u32,
        )?
        .into(),
        Instruction::with_3(
            Code::CMP_r_T1,
            MnemonicCondition::Al,
            Register::R3,
            Register::R4,
        )?
        .into(),
        Instruction::with_4(
            Code::IT_T1,
            ItCondition::Else,
            ItCondition::Else,
            ItCondition::None,
            Condition::Eq,
        )?
        .into(),
        Instruction::with_3(
            Code::ADR_T2,
            MnemonicCondition::Eq,
            Register::R5,
            Label::LabelName(0xdead0000),
        )?
        .into(),
        Instruction::with_encoding_3(
            Code::MOV_i_T1,
            Encoding::Alt1,
            MnemonicCondition::Ne,
            Register::R5,
            0u32,
        )?
        .into(),
        Instruction::with_encoding_2(
            Code::B_T4,
            Encoding::Alt1,
            MnemonicCondition::Ne,
            Label::LabelName(0xdead0d0e),
        )?
        .into(),
        Instruction::with_4(
            Code::LDR_i_T4_off,
            MnemonicCondition::Al,
            Register::R15,
            Register::R13,
            4u32,
        )?
        .into(),
    ];
    // Here we need to manually update the address of each instruction to make the assert succeed.
    let mut pc = 0xdead0000;
    for element in &mut expected_instructions {
        match element {
            InstructionBlockElement::Label(_) => {}
            InstructionBlockElement::Instruction(i) => {
                i.pc = pc;
                pc += i.size as u32;
            }
        }
    }
    let expected_block = InstructionBlock::with_instructions(0xdead0000, expected_instructions);
    assert_eq!(block, expected_block);

    // -------------------------------------------------------------------------------------------
    // Encoding
    // -------------------------------------------------------------------------------------------

    // The new address where we want to reencode our block
    block.pc = 0xdeed0000;
    let encoder_block = EncoderBlock::new(vec![block.clone()]);
    let code_blocks = encoder_block.encode()?;

    // The expected output, which are the original instruction but at address 0xdeed0000.
    let expected_code = [
        0x4d, 0xf8, 0x04, 0xec, 0x03, 0x68, 0x0c, 0x68, 0xa3, 0x42, 0x0e, 0xbf, 0xaf, 0xf2, 0x10,
        0x05, 0x00, 0x25, 0x00, 0xf4, 0x7c, 0xbe, 0x5d, 0xf8, 0x04, 0xfc,
    ];
    assert_eq!(expected_code, code_blocks[0].data[..]);

    // -------------------------------------------------------------------------------------------
    // Formatting
    // -------------------------------------------------------------------------------------------

    let mut output = String::new();
    let mut formatter = Fmt {};
    let mut config = ConfigLLVM::new();

    // Settings can be applied globally and per-instruction.
    // Here we change the output of the branch instruction to format label names as hexadecimal
    // numbers instead of decimal ones.
    config.instructions.b_t4.syntax.positive_integer_format =
        Some(FormatInteger::HexadecimalUnsigned);
    config.instructions.b_t4.syntax.negative_integer_format =
        Some(FormatInteger::HexadecimalUnsigned);
    config.instructions.adr_t2.syntax.positive_integer_format =
        Some(FormatInteger::HexadecimalUnsigned);
    config.instructions.adr_t2.syntax.negative_integer_format =
        Some(FormatInteger::HexadecimalUnsigned);

    // We format each instruction and separate them with a line return.
    for element in block.instructions {
        match element {
            InstructionBlockElement::Label(_) => {}
            InstructionBlockElement::Instruction(i) => {
                formatter.format(&mut output, &config, &i)?;
                output.push_str("\n");
            }
        }
    }

    // The expected output.
    assert_eq!(
        output,
        "str lr, [sp, #-4]\n\
        ldr r3, [r0]\n\
        ldr r4, [r1]\n\
        cmp r3, r4\n\
        itee eq\n\
        adreq.w r5, #0xdead0000\n\
        movne r5, #0\n\
        bne.w #0xdead0d0e\n\
        ldr pc, [sp, #-4]\n"
    );

    Ok(())
}

To-Do

  • Encoder/Decoder
    • Lexer
    • System registers
    • ARM features
    • ARM pseudocode parser (extract information from decode/operation pseudocode)
    • Remaining instruction types (system, simd/fp, sve, sme, etc.)
    • Errors
    • Serde
  • Generator
    • Code refactor
  • Tests
    • Test against additional disassemblers
    • Fuzzing
    • Benchmark
    • CI
  • Misc
    • Documentation