Skip to main content

ras/encoder/
aarch64.rs

1//! AArch64 binary instruction encoder
2
3// The binary literals in this file use field-aligned groupings that mirror the
4// AArch64 instruction encoding spec (e.g. op|S|Rn|imm12|Rd).  Reformatting
5// them into standard 4-bit nibble groups would destroy that readability.
6#![allow(clippy::unusual_byte_groupings)]
7// Several field OR expressions have a `| (0 << k)` term for documentation;
8// they are no-ops but make the encoding explicit.
9#![allow(clippy::identity_op)]
10
11use crate::encoder::traits::{InstructionEncoder, ParsedInstruction};
12use crate::error::RasError;
13
14pub struct AArch64Encoder {
15    position: usize,
16}
17
18impl Default for AArch64Encoder {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl AArch64Encoder {
25    pub fn new() -> Self {
26        Self { position: 0 }
27    }
28
29    fn parse_register(&self, reg: &str) -> Result<u8, RasError> {
30        let reg = reg.trim_start_matches('%');
31        match reg {
32            "x0" | "w0" => Ok(0),
33            "x1" | "w1" => Ok(1),
34            "x2" | "w2" => Ok(2),
35            "x3" | "w3" => Ok(3),
36            "x4" | "w4" => Ok(4),
37            "x5" | "w5" => Ok(5),
38            "x6" | "w6" => Ok(6),
39            "x7" | "w7" => Ok(7),
40            "x8" | "w8" => Ok(8),
41            "x9" | "w9" => Ok(9),
42            "x10" | "w10" => Ok(10),
43            "x11" | "w11" => Ok(11),
44            "x12" | "w12" => Ok(12),
45            "x13" | "w13" => Ok(13),
46            "x14" | "w14" => Ok(14),
47            "x15" | "w15" => Ok(15),
48            "x16" | "w16" | "ip0" => Ok(16),
49            "x17" | "w17" | "ip1" => Ok(17),
50            "x18" | "w18" => Ok(18),
51            "x19" | "w19" => Ok(19),
52            "x20" | "w20" => Ok(20),
53            "x21" | "w21" => Ok(21),
54            "x22" | "w22" => Ok(22),
55            "x23" | "w23" => Ok(23),
56            "x24" | "w24" => Ok(24),
57            "x25" | "w25" => Ok(25),
58            "x26" | "w26" => Ok(26),
59            "x27" | "w27" => Ok(27),
60            "x28" | "w28" => Ok(28),
61            "x29" | "w29" | "fp" => Ok(29),
62            "x30" | "w30" | "lr" => Ok(30),
63            "x31" | "w31" | "sp" | "xzr" | "wzr" => Ok(31),
64            _ => Err(RasError::EncodingError(format!(
65                "Unknown register: {}",
66                reg
67            ))),
68        }
69    }
70
71    fn encode_u32(&self, inst: u32) -> Vec<u8> {
72        inst.to_le_bytes().to_vec()
73    }
74
75    fn encode_mov_reg(&self, dst: u8, src: u8) -> Vec<u8> {
76        let inst = 0b1_01_01010_0_0_000000_11111_00000_00000;
77        let inst = inst | (dst as u32);
78        let inst = inst | ((31u32) << 5);
79        let inst = inst | ((src as u32) << 16);
80        self.encode_u32(inst)
81    }
82
83    fn encode_mov_imm64(&self, dst: u8, imm: u64) -> Result<Vec<u8>, RasError> {
84        if imm <= 0xFFFF {
85            let inst = 0b1_100101_10_0000000000000000_00000;
86            let inst = inst | (dst as u32);
87            let inst = inst | ((imm as u32 & 0xFFFF) << 5);
88            Ok(self.encode_u32(inst))
89        } else if (imm & 0xFFFF) == 0 && (imm >> 16) <= 0xFFFF {
90            let inst = 0b1_100101_10_01_0000000000000000_00000;
91            let inst = inst | (dst as u32);
92            let inst = inst | (((imm >> 16) as u32 & 0xFFFF) << 5);
93            Ok(self.encode_u32(inst))
94        } else {
95            Err(RasError::EncodingError(
96                "Large immediate values require MOVZ+MOVK sequence (not yet implemented)"
97                    .to_string(),
98            ))
99        }
100    }
101
102    fn encode_add_reg(&self, dst: u8, src1: u8, src2: u8) -> Vec<u8> {
103        let inst = (0b1u32 << 31)
104            | (0b0u32 << 30)
105            | (0b010110u32 << 24)
106            | (0b00u32 << 22)
107            | (0b000000u32 << 16)
108            | ((src2 as u32) << 10)
109            | ((src1 as u32) << 5)
110            | (dst as u32);
111        self.encode_u32(inst)
112    }
113
114    fn encode_add_imm(&self, dst: u8, src: u8, imm: u32) -> Result<Vec<u8>, RasError> {
115        if imm > 0xFFF {
116            return Err(RasError::EncodingError(
117                "ADD immediate must be 12 bits or less".to_string(),
118            ));
119        }
120        let inst = 0b1_0_0_100010_0_000000000000_00000_00000;
121        let inst = inst | (dst as u32);
122        let inst = inst | ((src as u32) << 5);
123        let inst = inst | ((imm & 0xFFF) << 10);
124        Ok(self.encode_u32(inst))
125    }
126
127    fn encode_str(&self, src: u8, base: u8, offset: i32) -> Result<Vec<u8>, RasError> {
128        crate::aarch64_ldst_imm64::encode_str_imm64(src, base, offset)
129    }
130
131    fn encode_ldr(&self, dst: u8, base: u8, offset: i32) -> Result<Vec<u8>, RasError> {
132        crate::aarch64_ldst_imm64::encode_ldr_imm64(dst, base, offset)
133    }
134
135    fn encode_ret(&self, reg: u8) -> Vec<u8> {
136        self.encode_u32(0xD65F_0000 | ((reg as u32) << 5))
137    }
138}
139
140impl InstructionEncoder for AArch64Encoder {
141    fn encode_instruction(&mut self, inst: &ParsedInstruction) -> Result<Vec<u8>, RasError> {
142        let opcode = inst.opcode.to_lowercase();
143        let mut code = Vec::new();
144
145        match opcode.as_str() {
146            "mov" | "movz" => {
147                if inst.operands.len() != 2 {
148                    return Err(RasError::EncodingError(
149                        "mov requires 2 operands".to_string(),
150                    ));
151                }
152                let dst = &inst.operands[0];
153                let src = &inst.operands[1];
154                let dst_reg = self.parse_register(dst)?;
155
156                if src.starts_with('#') {
157                    let imm: u64 = src
158                        .trim_start_matches('#')
159                        .parse()
160                        .map_err(|_| RasError::EncodingError("Invalid immediate".to_string()))?;
161                    code.extend_from_slice(&self.encode_mov_imm64(dst_reg, imm)?);
162                } else {
163                    let src_reg = self.parse_register(src)?;
164                    code.extend_from_slice(&self.encode_mov_reg(dst_reg, src_reg));
165                }
166            }
167            "ret" => {
168                if inst.operands.is_empty() {
169                    code.extend_from_slice(&self.encode_ret(30));
170                } else {
171                    let reg = self.parse_register(&inst.operands[0])?;
172                    code.extend_from_slice(&self.encode_ret(reg));
173                }
174            }
175            "add" => {
176                if inst.operands.len() != 3 {
177                    return Err(RasError::EncodingError(
178                        "add requires 3 operands".to_string(),
179                    ));
180                }
181                let dst = &inst.operands[0];
182                let src1 = &inst.operands[1];
183                let src2 = &inst.operands[2];
184                let dst_reg = self.parse_register(dst)?;
185                let src1_reg = self.parse_register(src1)?;
186
187                if src2.starts_with('#') {
188                    let imm: u32 = src2
189                        .trim_start_matches('#')
190                        .parse()
191                        .map_err(|_| RasError::EncodingError("Invalid immediate".to_string()))?;
192                    code.extend_from_slice(&self.encode_add_imm(dst_reg, src1_reg, imm)?);
193                } else {
194                    let src2_reg = self.parse_register(src2)?;
195                    code.extend_from_slice(&self.encode_add_reg(dst_reg, src1_reg, src2_reg));
196                }
197            }
198            "str" => {
199                if inst.operands.len() != 2 {
200                    return Err(RasError::EncodingError(
201                        "str requires 2 operands".to_string(),
202                    ));
203                }
204                let src = &inst.operands[0];
205                let mem = &inst.operands[1];
206                let src_reg = self.parse_register(src)?;
207
208                if mem.starts_with('[') && mem.ends_with(']') {
209                    let inner = &mem[1..mem.len() - 1];
210                    let parts: Vec<&str> = inner.split(',').map(|s| s.trim()).collect();
211                    let base_reg = self.parse_register(parts[0])?;
212                    let offset = if parts.len() > 1 {
213                        parts[1]
214                            .trim_start_matches('#')
215                            .parse::<i32>()
216                            .map_err(|_| RasError::EncodingError("Invalid offset".to_string()))?
217                    } else {
218                        0
219                    };
220                    code.extend_from_slice(&self.encode_str(src_reg, base_reg, offset)?);
221                } else {
222                    return Err(RasError::EncodingError(
223                        "str requires memory operand [reg, #offset]".to_string(),
224                    ));
225                }
226            }
227            "ldr" => {
228                if inst.operands.len() != 2 {
229                    return Err(RasError::EncodingError(
230                        "ldr requires 2 operands".to_string(),
231                    ));
232                }
233                let dst = &inst.operands[0];
234                let mem = &inst.operands[1];
235                let dst_reg = self.parse_register(dst)?;
236
237                if mem.starts_with('[') && mem.ends_with(']') {
238                    let inner = &mem[1..mem.len() - 1];
239                    let parts: Vec<&str> = inner.split(',').map(|s| s.trim()).collect();
240                    let base_reg = self.parse_register(parts[0])?;
241                    let offset = if parts.len() > 1 {
242                        parts[1]
243                            .trim_start_matches('#')
244                            .parse::<i32>()
245                            .map_err(|_| RasError::EncodingError("Invalid offset".to_string()))?
246                    } else {
247                        0
248                    };
249                    code.extend_from_slice(&self.encode_ldr(dst_reg, base_reg, offset)?);
250                } else {
251                    return Err(RasError::EncodingError(
252                        "ldr requires memory operand [reg, #offset]".to_string(),
253                    ));
254                }
255            }
256            _ => {
257                return Err(RasError::EncodingError(format!(
258                    "Unsupported instruction: {}",
259                    opcode
260                )));
261            }
262        }
263
264        self.position += code.len();
265        Ok(code)
266    }
267
268    fn current_position(&self) -> usize {
269        self.position
270    }
271}
272
273#[cfg(test)]
274mod aarch64_ldr_str_encoder_tests {
275    use super::AArch64Encoder;
276    use crate::encoder::traits::{InstructionEncoder, ParsedInstruction};
277
278    fn encode_ldr_line(base: &str, dst: &str, offset: i32) -> Vec<u8> {
279        let mem = format!("[{}, #{}]", base, offset);
280        let mut enc = AArch64Encoder::new();
281        enc.encode_instruction(&ParsedInstruction {
282            opcode: "ldr".into(),
283            operands: vec![dst.into(), mem],
284        })
285        .expect("ldr encode")
286    }
287
288    fn encode_str_line(base: &str, src: &str, offset: i32) -> Vec<u8> {
289        let mem = format!("[{}, #{}]", base, offset);
290        let mut enc = AArch64Encoder::new();
291        enc.encode_instruction(&ParsedInstruction {
292            opcode: "str".into(),
293            operands: vec![src.into(), mem],
294        })
295        .expect("str encode")
296    }
297
298    #[test]
299    fn ldr_scaled_1024_byte_offset_uses_imm12_times_eight() {
300        let b = encode_ldr_line("x1", "x0", 1024);
301        assert_eq!(b, vec![0x20, 0x00, 0x42, 0xF9]);
302    }
303
304    #[test]
305    fn str_scaled_512_byte_offset_not_ldur_range() {
306        let b = encode_str_line("x3", "x2", 512);
307        assert_eq!(b, vec![0x62, 0x00, 0x01, 0xF9]);
308    }
309
310    #[test]
311    fn ldr_unscaled_small_positive_uses_ldur() {
312        let b = encode_ldr_line("x5", "x4", 24);
313        assert_eq!(b.len(), 4);
314        assert_eq!(b, vec![0xA4, 0x80, 0x41, 0xF8]);
315    }
316
317    #[test]
318    fn ldr_unaligned_positive_beyond_ldur_range_errors() {
319        let mem = "[x1, #260]";
320        let mut enc = AArch64Encoder::new();
321        let err = enc
322            .encode_instruction(&ParsedInstruction {
323                opcode: "ldr".into(),
324                operands: vec!["x0".into(), mem.into()],
325            })
326            .expect_err("260 not multiple of 8 and > 255");
327        match err {
328            crate::error::RasError::EncodingError(msg) => {
329                assert!(msg.contains("260"), "{}", msg);
330            }
331            _ => panic!("expected EncodingError"),
332        }
333    }
334}