Skip to main content

ras/encoder/
x86_64.rs

1//! x86-64 binary instruction encoder
2
3use crate::encoder::traits::{InstructionEncoder, ParsedInstruction};
4use crate::error::RasError;
5
6pub struct X86_64Encoder {
7    position: usize,
8}
9
10impl Default for X86_64Encoder {
11    fn default() -> Self {
12        Self::new()
13    }
14}
15
16impl X86_64Encoder {
17    pub fn new() -> Self {
18        Self { position: 0 }
19    }
20
21    fn encode_rex(&self, w: bool, r: u8, x: u8, b: u8) -> u8 {
22        let mut rex = 0x40;
23        if w {
24            rex |= 0x08;
25        }
26        if r > 0 {
27            rex |= 0x04;
28        }
29        if x > 0 {
30            rex |= 0x02;
31        }
32        if b > 0 {
33            rex |= 0x01;
34        }
35        rex
36    }
37
38    fn parse_register(&self, reg: &str) -> Result<u8, RasError> {
39        let reg = reg.trim_start_matches('%');
40        match reg {
41            "rax" | "eax" | "ax" | "al" => Ok(0),
42            "rcx" | "ecx" | "cx" | "cl" => Ok(1),
43            "rdx" | "edx" | "dx" | "dl" => Ok(2),
44            "rbx" | "ebx" | "bx" | "bl" => Ok(3),
45            "rsp" | "esp" | "sp" | "ah" => Ok(4),
46            "rbp" | "ebp" | "bp" | "ch" => Ok(5),
47            "rsi" | "esi" | "si" | "dh" => Ok(6),
48            "rdi" | "edi" | "di" | "bh" => Ok(7),
49            "r8" => Ok(8),
50            "r9" => Ok(9),
51            "r10" => Ok(10),
52            "r11" => Ok(11),
53            "r12" => Ok(12),
54            "r13" => Ok(13),
55            "r14" => Ok(14),
56            "r15" => Ok(15),
57            _ => Err(RasError::EncodingError(format!(
58                "Unknown register: {}",
59                reg
60            ))),
61        }
62    }
63
64    fn parse_memory(&self, op: &str) -> Result<(i32, u8), RasError> {
65        let op = op.trim();
66        if !op.contains('(') || !op.contains(')') {
67            return Err(RasError::EncodingError(format!(
68                "Invalid memory operand: {}",
69                op
70            )));
71        }
72        let paren_start = op.find('(').unwrap();
73        let paren_end = op.find(')').unwrap();
74        let disp_str = op[..paren_start].trim();
75        let base_str = op[paren_start + 1..paren_end].trim();
76
77        let disp: i32 = if disp_str.is_empty() {
78            0
79        } else {
80            disp_str.parse().map_err(|_| {
81                RasError::EncodingError(format!("Invalid displacement: {}", disp_str))
82            })?
83        };
84
85        let base = self.parse_register(base_str)?;
86        Ok((disp, base))
87    }
88
89    fn is_memory(&self, op: &str) -> bool {
90        op.contains('(') && op.contains(')')
91    }
92
93    fn encode_mov_mem_reg(&self, mem: &str, reg: u8, store: bool) -> Result<Vec<u8>, RasError> {
94        let (disp, base) = self.parse_memory(mem)?;
95        let rex = self.encode_rex(true, reg >> 3, 0, base >> 3);
96        let mod_reg = (reg & 7) << 3;
97
98        let (mod_bits, r_m, sib_opt, disp_bytes): (u8, u8, Option<u8>, Vec<u8>) = if base == 4 {
99            (0, 4, Some(0x24), vec![])
100        } else if base >= 8 {
101            let sib = (2 << 6) | (4 << 3) | (base & 7);
102            if disp == 0 {
103                (0, 4, Some(sib), vec![])
104            } else if (-128..=127).contains(&disp) {
105                (1, 4, Some(sib), vec![disp as u8])
106            } else {
107                (2, 4, Some(sib), disp.to_le_bytes().to_vec())
108            }
109        } else if disp == 0 && base != 5 {
110            (0, base & 7, None, vec![])
111        } else if base == 5 && disp == 0 {
112            (1, 5, None, vec![0])
113        } else if (-128..=127).contains(&disp) {
114            (1, base & 7, None, vec![disp as u8])
115        } else {
116            (2, base & 7, None, disp.to_le_bytes().to_vec())
117        };
118
119        let modrm = (mod_bits << 6) | mod_reg | r_m;
120        let mut code = vec![rex];
121        code.push(if store { 0x89 } else { 0x8B });
122        code.push(modrm);
123        if let Some(sib) = sib_opt {
124            code.push(sib);
125        }
126        code.extend(disp_bytes);
127        Ok(code)
128    }
129}
130
131impl InstructionEncoder for X86_64Encoder {
132    fn encode_instruction(&mut self, inst: &ParsedInstruction) -> Result<Vec<u8>, RasError> {
133        let opcode = inst.opcode.to_lowercase();
134        let mut code = Vec::new();
135
136        match opcode.as_str() {
137            "movq" | "mov" => {
138                if inst.operands.len() != 2 {
139                    return Err(RasError::EncodingError(
140                        "mov requires 2 operands".to_string(),
141                    ));
142                }
143                let a = &inst.operands[0];
144                let b = &inst.operands[1];
145
146                if a.starts_with('$') && !b.starts_with('$') {
147                    let imm: i64 = a
148                        .trim_start_matches('$')
149                        .parse()
150                        .map_err(|_| RasError::EncodingError("Invalid immediate".to_string()))?;
151                    let dst_reg = self.parse_register(b)?;
152                    code.push(self.encode_rex(true, 0, 0, dst_reg >> 3));
153                    code.push(0xB8 | (dst_reg & 7));
154                    code.extend_from_slice(&imm.to_le_bytes());
155                } else if !a.starts_with('$') && b.starts_with('$') {
156                    let imm: i64 = b
157                        .trim_start_matches('$')
158                        .parse()
159                        .map_err(|_| RasError::EncodingError("Invalid immediate".to_string()))?;
160                    let dst_reg = self.parse_register(a)?;
161                    code.push(self.encode_rex(true, 0, 0, dst_reg >> 3));
162                    code.push(0xB8 | (dst_reg & 7));
163                    code.extend_from_slice(&imm.to_le_bytes());
164                } else if self.is_memory(a) && !self.is_memory(b) {
165                    let reg = self.parse_register(b)?;
166                    code.extend(self.encode_mov_mem_reg(a, reg, false)?);
167                } else if self.is_memory(b) && !self.is_memory(a) {
168                    let reg = self.parse_register(a)?;
169                    code.extend(self.encode_mov_mem_reg(b, reg, true)?);
170                } else if !a.starts_with('$')
171                    && !b.starts_with('$')
172                    && !self.is_memory(a)
173                    && !self.is_memory(b)
174                {
175                    let a_xmm = parse_xmm(a.trim().trim_start_matches('%'));
176                    let b_xmm = parse_xmm(b.trim().trim_start_matches('%'));
177                    if let (Ok(gpr), Some(xmm)) = (self.parse_register(a), b_xmm) {
178                        // movq %gpr, %xmm  →  66 REX.W 0F 6E /r
179                        code.push(0x66);
180                        code.push(0x48 | ((xmm >> 3) << 2) | (gpr >> 3));
181                        code.extend_from_slice(&[0x0F, 0x6E]);
182                        code.push(0xC0 | ((xmm & 7) << 3) | (gpr & 7));
183                    } else if let (Some(xmm), Ok(gpr)) = (a_xmm, self.parse_register(b)) {
184                        // movq %xmm, %gpr  →  66 REX.W 0F 7E /r
185                        code.push(0x66);
186                        code.push(0x48 | ((xmm >> 3) << 2) | (gpr >> 3));
187                        code.extend_from_slice(&[0x0F, 0x7E]);
188                        code.push(0xC0 | ((xmm & 7) << 3) | (gpr & 7));
189                    } else {
190                        // AT&T order: `mov src, dst`. 0x89 is MOV r/m64, r64 where
191                        // the reg field is the source and r/m is the destination.
192                        let src_reg = self.parse_register(a)?;
193                        let dst_reg = self.parse_register(b)?;
194                        code.push(self.encode_rex(true, src_reg >> 3, 0, dst_reg >> 3));
195                        code.push(0x89);
196                        code.push(0xC0 | ((src_reg & 7) << 3) | (dst_reg & 7));
197                    }
198                } else {
199                    return Err(RasError::EncodingError(
200                        "mov: one operand must be a register".to_string(),
201                    ));
202                }
203            }
204            "ret" => code.push(0xC3),
205            "addq" | "add" => {
206                if inst.operands.len() != 2 {
207                    return Err(RasError::EncodingError(
208                        "add requires 2 operands".to_string(),
209                    ));
210                }
211                let a = &inst.operands[0];
212                let b = &inst.operands[1];
213
214                let (dst_reg, imm_opt) =
215                    if a.starts_with('$') {
216                        let imm: i32 = a.trim_start_matches('$').parse().map_err(|_| {
217                            RasError::EncodingError("Invalid immediate".to_string())
218                        })?;
219                        (self.parse_register(b)?, Some(imm))
220                    } else if b.starts_with('$') {
221                        let imm: i32 = b.trim_start_matches('$').parse().map_err(|_| {
222                            RasError::EncodingError("Invalid immediate".to_string())
223                        })?;
224                        (self.parse_register(a)?, Some(imm))
225                    } else {
226                        // AT&T order: `add src, dst`. 0x01 is ADD r/m64, r64
227                        // (reg = source added into r/m = destination).
228                        let src_reg = self.parse_register(a)?;
229                        let dst_reg = self.parse_register(b)?;
230                        code.push(self.encode_rex(true, src_reg >> 3, 0, dst_reg >> 3));
231                        code.push(0x01);
232                        code.push(0xC0 | ((src_reg & 7) << 3) | (dst_reg & 7));
233                        self.position += code.len();
234                        return Ok(code);
235                    };
236
237                if let Some(imm) = imm_opt {
238                    if (-128..=127).contains(&imm) {
239                        code.push(self.encode_rex(true, 0, 0, dst_reg >> 3));
240                        code.push(0x83);
241                        code.push(0xC0 | (dst_reg & 7));
242                        code.push(imm as u8);
243                    } else {
244                        code.push(self.encode_rex(true, 0, 0, dst_reg >> 3));
245                        code.push(0x81);
246                        code.push(0xC0 | (dst_reg & 7));
247                        code.extend_from_slice(&imm.to_le_bytes());
248                    }
249                }
250            }
251            "pushq" | "push" => {
252                if inst.operands.len() != 1 {
253                    return Err(RasError::EncodingError(
254                        "push requires 1 operand".to_string(),
255                    ));
256                }
257                let reg = self.parse_register(&inst.operands[0])?;
258                if reg >= 8 {
259                    code.push(0x41);
260                }
261                code.push(0x50 | (reg & 7));
262            }
263            "popq" | "pop" => {
264                if inst.operands.len() != 1 {
265                    return Err(RasError::EncodingError(
266                        "pop requires 1 operand".to_string(),
267                    ));
268                }
269                let reg = self.parse_register(&inst.operands[0])?;
270                if reg >= 8 {
271                    code.push(0x41);
272                }
273                code.push(0x58 | (reg & 7));
274            }
275            "leaq" | "lea" => {
276                if inst.operands.len() != 2 {
277                    return Err(RasError::EncodingError(
278                        "lea requires 2 operands".to_string(),
279                    ));
280                }
281                let a = &inst.operands[0];
282                let b = &inst.operands[1];
283                if !self.is_memory(a) || self.is_memory(b) {
284                    return Err(RasError::EncodingError(
285                        "lea: first operand must be memory, second register".to_string(),
286                    ));
287                }
288                let reg = self.parse_register(b)?;
289                let bytes = self.encode_mov_mem_reg(a, reg, false)?;
290                let mut lea_code = vec![bytes[0]];
291                lea_code.push(0x8D);
292                lea_code.extend_from_slice(&bytes[2..]);
293                code.extend(lea_code);
294            }
295            "imulq" | "imul" => {
296                if inst.operands.len() != 2 {
297                    return Err(RasError::EncodingError(
298                        "imul requires 2 operands".to_string(),
299                    ));
300                }
301                let a = &inst.operands[0];
302                let b = &inst.operands[1];
303                if self.is_memory(a) || self.is_memory(b) {
304                    return Err(RasError::EncodingError(
305                        "imul reg,reg only supported".to_string(),
306                    ));
307                }
308                // AT&T order: `imul src, dst`. 0F AF is IMUL r64, r/m64 where
309                // the reg field is the destination and r/m is the source.
310                let src_reg = self.parse_register(a)?;
311                let dst_reg = self.parse_register(b)?;
312                code.push(self.encode_rex(true, dst_reg >> 3, 0, src_reg >> 3));
313                code.extend_from_slice(&[0x0F, 0xAF]);
314                code.push(0xC0 | ((dst_reg & 7) << 3) | (src_reg & 7));
315            }
316            "subq" | "sub" => {
317                if inst.operands.len() != 2 {
318                    return Err(RasError::EncodingError(
319                        "sub requires 2 operands".to_string(),
320                    ));
321                }
322                let a = &inst.operands[0];
323                let b = &inst.operands[1];
324                let (dst_reg, imm_opt) =
325                    if a.starts_with('$') {
326                        let imm: i32 = a.trim_start_matches('$').parse().map_err(|_| {
327                            RasError::EncodingError("Invalid immediate".to_string())
328                        })?;
329                        (self.parse_register(b)?, Some(imm))
330                    } else if b.starts_with('$') {
331                        let imm: i32 = b.trim_start_matches('$').parse().map_err(|_| {
332                            RasError::EncodingError("Invalid immediate".to_string())
333                        })?;
334                        (self.parse_register(a)?, Some(imm))
335                    } else {
336                        // AT&T order: `sub src, dst`. 0x29 is SUB r/m64, r64
337                        // (r/m = destination, reg = subtracted source).
338                        let src_reg = self.parse_register(a)?;
339                        let dst_reg = self.parse_register(b)?;
340                        code.push(self.encode_rex(true, src_reg >> 3, 0, dst_reg >> 3));
341                        code.push(0x29);
342                        code.push(0xC0 | ((src_reg & 7) << 3) | (dst_reg & 7));
343                        self.position += code.len();
344                        return Ok(code);
345                    };
346                if let Some(imm) = imm_opt {
347                    if (-128..=127).contains(&imm) {
348                        code.push(self.encode_rex(true, 0, 0, dst_reg >> 3));
349                        code.push(0x83);
350                        code.push(0xE8 | (dst_reg & 7));
351                        code.push(imm as u8);
352                    } else {
353                        code.push(self.encode_rex(true, 0, 0, dst_reg >> 3));
354                        code.push(0x81);
355                        code.push(0xE8 | (dst_reg & 7));
356                        code.extend_from_slice(&imm.to_le_bytes());
357                    }
358                }
359            }
360            // REX.W 0x99: sign-extend rax into rdx:rax (needed before idivq)
361            "cqto" | "cqo" => {
362                code.push(0x48); // REX.W
363                code.push(0x99);
364            }
365            // REX.W 0x99 for 32-bit sign-extend eax into edx:eax
366            "cdq" => {
367                code.push(0x99);
368            }
369            // idivq %reg: REX.W F7 /7
370            "idivq" | "idiv" => {
371                if inst.operands.len() != 1 {
372                    return Err(RasError::EncodingError(
373                        "idivq requires 1 operand".to_string(),
374                    ));
375                }
376                let reg = self.parse_register(&inst.operands[0])?;
377                code.push(self.encode_rex(true, 0, 0, reg >> 3));
378                code.push(0xF7);
379                code.push(0xF8 | (reg & 7)); // ModRM: /7 = 111, mod=11
380            }
381            // divq %reg: REX.W F7 /6 (unsigned divide)
382            "divq" | "div" => {
383                if inst.operands.len() != 1 {
384                    return Err(RasError::EncodingError(
385                        "divq requires 1 operand".to_string(),
386                    ));
387                }
388                let reg = self.parse_register(&inst.operands[0])?;
389                code.push(self.encode_rex(true, 0, 0, reg >> 3));
390                code.push(0xF7);
391                code.push(0xF0 | (reg & 7)); // ModRM: /6 = 110, mod=11
392            }
393            // xorq reg, reg
394            "xorq" | "xor" => {
395                if inst.operands.len() != 2 {
396                    return Err(RasError::EncodingError(
397                        "xorq requires 2 operands".to_string(),
398                    ));
399                }
400                // AT&T order: `xor src, dst`. 0x31 is XOR r/m64, r64
401                // (r/m = destination, reg = source).
402                let src = self.parse_register(&inst.operands[0])?;
403                let dst = self.parse_register(&inst.operands[1])?;
404                code.push(self.encode_rex(true, src >> 3, 0, dst >> 3));
405                code.push(0x31);
406                code.push(0xC0 | ((src & 7) << 3) | (dst & 7));
407            }
408            // andq reg/imm, reg
409            "andq" | "and" => {
410                if inst.operands.len() != 2 {
411                    return Err(RasError::EncodingError(
412                        "andq requires 2 operands".to_string(),
413                    ));
414                }
415                let a = &inst.operands[0];
416                let b = &inst.operands[1];
417                if a.starts_with('$') {
418                    let imm: i32 = a.trim_start_matches('$').parse().map_err(|_| {
419                        RasError::EncodingError("Invalid immediate for andq".to_string())
420                    })?;
421                    let dst = self.parse_register(b)?;
422                    if (-128..=127).contains(&imm) {
423                        code.push(self.encode_rex(true, 0, 0, dst >> 3));
424                        code.push(0x83);
425                        code.push(0xE0 | (dst & 7)); // /4
426                        code.push(imm as u8);
427                    } else {
428                        code.push(self.encode_rex(true, 0, 0, dst >> 3));
429                        code.push(0x81);
430                        code.push(0xE0 | (dst & 7));
431                        code.extend_from_slice(&imm.to_le_bytes());
432                    }
433                } else {
434                    // AT&T order: `and src, dst`. 0x21 is AND r/m64, r64
435                    // (r/m = destination, reg = source).
436                    let src = self.parse_register(a)?;
437                    let dst = self.parse_register(b)?;
438                    code.push(self.encode_rex(true, src >> 3, 0, dst >> 3));
439                    code.push(0x21);
440                    code.push(0xC0 | ((src & 7) << 3) | (dst & 7));
441                }
442            }
443            // orq reg/imm, reg
444            "orq" | "or" => {
445                if inst.operands.len() != 2 {
446                    return Err(RasError::EncodingError(
447                        "orq requires 2 operands".to_string(),
448                    ));
449                }
450                let a = &inst.operands[0];
451                let b = &inst.operands[1];
452                if a.starts_with('$') {
453                    let imm: i32 = a.trim_start_matches('$').parse().map_err(|_| {
454                        RasError::EncodingError("Invalid immediate for orq".to_string())
455                    })?;
456                    let dst = self.parse_register(b)?;
457                    if (-128..=127).contains(&imm) {
458                        code.push(self.encode_rex(true, 0, 0, dst >> 3));
459                        code.push(0x83);
460                        code.push(0xC8 | (dst & 7)); // /1
461                        code.push(imm as u8);
462                    } else {
463                        code.push(self.encode_rex(true, 0, 0, dst >> 3));
464                        code.push(0x81);
465                        code.push(0xC8 | (dst & 7));
466                        code.extend_from_slice(&imm.to_le_bytes());
467                    }
468                } else {
469                    // AT&T order: `or src, dst`. 0x09 is OR r/m64, r64
470                    // (r/m = destination, reg = source).
471                    let src = self.parse_register(a)?;
472                    let dst = self.parse_register(b)?;
473                    code.push(self.encode_rex(true, src >> 3, 0, dst >> 3));
474                    code.push(0x09);
475                    code.push(0xC0 | ((src & 7) << 3) | (dst & 7));
476                }
477            }
478            // sarq / salq / shrq / shlq (shift instructions)
479            "sarq" | "sar" => {
480                if inst.operands.len() != 2 {
481                    return Err(RasError::EncodingError(
482                        "sarq requires 2 operands".to_string(),
483                    ));
484                }
485                let a = &inst.operands[0];
486                let dst = self.parse_register(&inst.operands[1])?;
487                if a.starts_with('$') {
488                    let imm: u8 = a.trim_start_matches('$').parse().map_err(|_| {
489                        RasError::EncodingError("Invalid immediate for sarq".to_string())
490                    })?;
491                    code.push(self.encode_rex(true, 0, 0, dst >> 3));
492                    code.push(0xC1);
493                    code.push(0xF8 | (dst & 7)); // /7
494                    code.push(imm);
495                } else if a == "%cl" || a == "cl" {
496                    code.push(self.encode_rex(true, 0, 0, dst >> 3));
497                    code.push(0xD3);
498                    code.push(0xF8 | (dst & 7));
499                } else {
500                    return Err(RasError::EncodingError(
501                        "sarq: unsupported shift source".to_string(),
502                    ));
503                }
504            }
505            "shlq" | "salq" | "shl" | "sal" => {
506                if inst.operands.len() != 2 {
507                    return Err(RasError::EncodingError(
508                        "shlq requires 2 operands".to_string(),
509                    ));
510                }
511                let a = &inst.operands[0];
512                let dst = self.parse_register(&inst.operands[1])?;
513                if a.starts_with('$') {
514                    let imm: u8 = a.trim_start_matches('$').parse().map_err(|_| {
515                        RasError::EncodingError("Invalid immediate for shlq".to_string())
516                    })?;
517                    code.push(self.encode_rex(true, 0, 0, dst >> 3));
518                    code.push(0xC1);
519                    code.push(0xE0 | (dst & 7)); // /4
520                    code.push(imm);
521                } else if a == "%cl" || a == "cl" {
522                    code.push(self.encode_rex(true, 0, 0, dst >> 3));
523                    code.push(0xD3);
524                    code.push(0xE0 | (dst & 7));
525                } else {
526                    return Err(RasError::EncodingError(
527                        "shlq: unsupported shift source".to_string(),
528                    ));
529                }
530            }
531            "shrq" | "shr" => {
532                if inst.operands.len() != 2 {
533                    return Err(RasError::EncodingError(
534                        "shrq requires 2 operands".to_string(),
535                    ));
536                }
537                let a = &inst.operands[0];
538                let dst = self.parse_register(&inst.operands[1])?;
539                if a.starts_with('$') {
540                    let imm: u8 = a.trim_start_matches('$').parse().map_err(|_| {
541                        RasError::EncodingError("Invalid immediate for shrq".to_string())
542                    })?;
543                    code.push(self.encode_rex(true, 0, 0, dst >> 3));
544                    code.push(0xC1);
545                    code.push(0xE8 | (dst & 7)); // /5
546                    code.push(imm);
547                } else if a == "%cl" || a == "cl" {
548                    code.push(self.encode_rex(true, 0, 0, dst >> 3));
549                    code.push(0xD3);
550                    code.push(0xE8 | (dst & 7));
551                } else {
552                    return Err(RasError::EncodingError(
553                        "shrq: unsupported shift source".to_string(),
554                    ));
555                }
556            }
557            // cmpq: compare two values (sets flags)
558            "cmpq" | "cmp" => {
559                if inst.operands.len() != 2 {
560                    return Err(RasError::EncodingError(
561                        "cmpq requires 2 operands".to_string(),
562                    ));
563                }
564                let a = &inst.operands[0];
565                let b = &inst.operands[1];
566                let dst = self.parse_register(b)?;
567                if a.starts_with('$') {
568                    let imm: i32 = a.trim_start_matches('$').parse().map_err(|_| {
569                        RasError::EncodingError("Invalid immediate for cmpq".to_string())
570                    })?;
571                    if (-128..=127).contains(&imm) {
572                        code.push(self.encode_rex(true, 0, 0, dst >> 3));
573                        code.push(0x83);
574                        code.push(0xF8 | (dst & 7)); // /7
575                        code.push(imm as u8);
576                    } else {
577                        code.push(self.encode_rex(true, 0, 0, dst >> 3));
578                        code.push(0x81);
579                        code.push(0xF8 | (dst & 7));
580                        code.extend_from_slice(&imm.to_le_bytes());
581                    }
582                } else {
583                    // AT&T order: `cmp src, dst` computes dst - src. 0x3B is
584                    // CMP r64, r/m64 (reg = dst, r/m = src).
585                    let src = self.parse_register(a)?;
586                    code.push(self.encode_rex(true, dst >> 3, 0, src >> 3));
587                    code.push(0x3B);
588                    code.push(0xC0 | ((dst & 7) << 3) | (src & 7));
589                }
590            }
591            // testq: bitwise AND without storing result (sets flags)
592            "testq" | "test" => {
593                if inst.operands.len() != 2 {
594                    return Err(RasError::EncodingError(
595                        "testq requires 2 operands".to_string(),
596                    ));
597                }
598                let a = &inst.operands[0];
599                let b = &inst.operands[1];
600                if a.starts_with('$') {
601                    let imm: i32 = a.trim_start_matches('$').parse().map_err(|_| {
602                        RasError::EncodingError("Invalid immediate for testq".to_string())
603                    })?;
604                    let dst = self.parse_register(b)?;
605                    code.push(self.encode_rex(true, 0, 0, dst >> 3));
606                    code.push(0xF7);
607                    code.push(0xC0 | (dst & 7)); // /0
608                    code.extend_from_slice(&imm.to_le_bytes());
609                } else {
610                    let dst = self.parse_register(a)?;
611                    let src = self.parse_register(b)?;
612                    code.push(self.encode_rex(true, src >> 3, 0, dst >> 3));
613                    code.push(0x85);
614                    code.push(0xC0 | ((src & 7) << 3) | (dst & 7));
615                }
616            }
617            // SETcc instructions (byte result from flags)
618            "sete" | "setz" => {
619                if inst.operands.len() != 1 {
620                    return Err(RasError::EncodingError(
621                        "sete requires 1 operand".to_string(),
622                    ));
623                }
624                let reg = self.parse_register(&inst.operands[0])?;
625                if reg >= 4 {
626                    code.push(0x40 | (reg >> 3));
627                } // REX prefix for spl/bpl/sil/dil and r8b-r15b
628                code.push(0x0F);
629                code.push(0x94);
630                code.push(0xC0 | (reg & 7));
631            }
632            "setne" | "setnz" => {
633                if inst.operands.len() != 1 {
634                    return Err(RasError::EncodingError(
635                        "setne requires 1 operand".to_string(),
636                    ));
637                }
638                let reg = self.parse_register(&inst.operands[0])?;
639                if reg >= 4 {
640                    code.push(0x40 | (reg >> 3));
641                }
642                code.push(0x0F);
643                code.push(0x95);
644                code.push(0xC0 | (reg & 7));
645            }
646            "setl" | "setnge" => {
647                if inst.operands.len() != 1 {
648                    return Err(RasError::EncodingError(
649                        "setl requires 1 operand".to_string(),
650                    ));
651                }
652                let reg = self.parse_register(&inst.operands[0])?;
653                if reg >= 4 {
654                    code.push(0x40 | (reg >> 3));
655                }
656                code.push(0x0F);
657                code.push(0x9C);
658                code.push(0xC0 | (reg & 7));
659            }
660            "setle" | "setng" => {
661                if inst.operands.len() != 1 {
662                    return Err(RasError::EncodingError(
663                        "setle requires 1 operand".to_string(),
664                    ));
665                }
666                let reg = self.parse_register(&inst.operands[0])?;
667                if reg >= 4 {
668                    code.push(0x40 | (reg >> 3));
669                }
670                code.push(0x0F);
671                code.push(0x9E);
672                code.push(0xC0 | (reg & 7));
673            }
674            "setg" | "setnle" => {
675                if inst.operands.len() != 1 {
676                    return Err(RasError::EncodingError(
677                        "setg requires 1 operand".to_string(),
678                    ));
679                }
680                let reg = self.parse_register(&inst.operands[0])?;
681                if reg >= 4 {
682                    code.push(0x40 | (reg >> 3));
683                }
684                code.push(0x0F);
685                code.push(0x9F);
686                code.push(0xC0 | (reg & 7));
687            }
688            "setge" | "setnl" => {
689                if inst.operands.len() != 1 {
690                    return Err(RasError::EncodingError(
691                        "setge requires 1 operand".to_string(),
692                    ));
693                }
694                let reg = self.parse_register(&inst.operands[0])?;
695                if reg >= 4 {
696                    code.push(0x40 | (reg >> 3));
697                }
698                code.push(0x0F);
699                code.push(0x9D);
700                code.push(0xC0 | (reg & 7));
701            }
702            // xorl: 32-bit XOR (no REX.W; zeroing a reg also zero-extends to 64-bit)
703            "xorl" => {
704                if inst.operands.len() != 2 {
705                    return Err(RasError::EncodingError("xorl requires 2 operands".into()));
706                }
707                let src = self.parse_register(&inst.operands[0])?;
708                let dst = self.parse_register(&inst.operands[1])?;
709                // Only REX prefix if accessing r8-r15
710                if src >= 8 || dst >= 8 {
711                    code.push(0x40 | ((src >> 3) << 2) | (dst >> 3));
712                }
713                code.push(0x31);
714                code.push(0xC0 | ((src & 7) << 3) | (dst & 7));
715            }
716            // movb: byte store to memory or byte reg move
717            "movb" => {
718                if inst.operands.len() != 2 {
719                    return Err(RasError::EncodingError("movb requires 2 operands".into()));
720                }
721                let src_str = &inst.operands[0];
722                let dst_str = &inst.operands[1];
723                if self.is_memory(dst_str) {
724                    // movb %reg8, (mem)  — 88 /r
725                    let (disp, base) = self.parse_memory(dst_str)?;
726                    let src = self.parse_register(src_str)?;
727                    // REX prefix if needed for high-numbered regs
728                    if src >= 4 || base >= 8 {
729                        code.push(0x40 | ((src >> 3) << 2) | (base >> 3));
730                    }
731                    code.push(0x88);
732                    if base == 4 {
733                        code.push(((src & 7) << 3) | 4);
734                        code.push(0x24); // SIB: index=none, base=rsp
735                    } else if disp == 0 && base != 5 {
736                        code.push(((src & 7) << 3) | (base & 7));
737                    } else if (-128..=127).contains(&disp) {
738                        code.push(0x40 | ((src & 7) << 3) | (base & 7));
739                        code.push(disp as u8);
740                    } else {
741                        code.push(0x80 | ((src & 7) << 3) | (base & 7));
742                        code.extend_from_slice(&disp.to_le_bytes());
743                    }
744                } else if src_str.starts_with('$') {
745                    // movb $imm8, (mem)
746                    let imm: i8 = src_str.trim_start_matches('$').parse().map_err(|_| {
747                        RasError::EncodingError("Invalid immediate for movb".into())
748                    })?;
749                    let (disp, base) = self.parse_memory(dst_str)?;
750                    if base >= 8 { code.push(0x41); }
751                    code.push(0xC6);
752                    if disp == 0 && base != 5 {
753                        code.push(base & 7);
754                    } else if (-128..=127).contains(&disp) {
755                        code.push(0x40 | (base & 7));
756                        code.push(disp as u8);
757                    } else {
758                        code.push(0x80 | (base & 7));
759                        code.extend_from_slice(&disp.to_le_bytes());
760                    }
761                    code.push(imm as u8);
762                } else {
763                    return Err(RasError::EncodingError("movb: unsupported operand form".into()));
764                }
765            }
766            // syscall: 0F 05
767            "syscall" => {
768                code.extend_from_slice(&[0x0F, 0x05]);
769            }
770            // SETcc for unsigned comparisons
771            "seta" | "setnbe" => {
772                let reg = self.parse_register(&inst.operands[0])?;
773                if reg >= 4 { code.push(0x40 | (reg >> 3)); }
774                code.extend_from_slice(&[0x0F, 0x97, 0xC0 | (reg & 7)]);
775            }
776            "setae" | "setnb" | "setnc" => {
777                let reg = self.parse_register(&inst.operands[0])?;
778                if reg >= 4 { code.push(0x40 | (reg >> 3)); }
779                code.extend_from_slice(&[0x0F, 0x93, 0xC0 | (reg & 7)]);
780            }
781            "setb" | "setnae" | "setc" => {
782                let reg = self.parse_register(&inst.operands[0])?;
783                if reg >= 4 { code.push(0x40 | (reg >> 3)); }
784                code.extend_from_slice(&[0x0F, 0x92, 0xC0 | (reg & 7)]);
785            }
786            "setbe" | "setna" => {
787                let reg = self.parse_register(&inst.operands[0])?;
788                if reg >= 4 { code.push(0x40 | (reg >> 3)); }
789                code.extend_from_slice(&[0x0F, 0x96, 0xC0 | (reg & 7)]);
790            }
791            // cmovzq / cmovz (conditional move if ZF=1)
792            "cmovzq" | "cmovz" | "cmoveq" | "cmove" => {
793                if inst.operands.len() != 2 {
794                    return Err(RasError::EncodingError("cmovzq requires 2 operands".into()));
795                }
796                let src = self.parse_register(&inst.operands[0])?;
797                let dst = self.parse_register(&inst.operands[1])?;
798                code.push(self.encode_rex(true, dst >> 3, 0, src >> 3));
799                code.extend_from_slice(&[0x0F, 0x44]);
800                code.push(0xC0 | ((dst & 7) << 3) | (src & 7));
801            }
802            // movd: move 32/64-bit GPR to/from XMM  (66 [REX] 0F 6E /r)
803            "movd" | "movq_xmm" => {
804                if inst.operands.len() != 2 {
805                    return Err(RasError::EncodingError("movd requires 2 operands".into()));
806                }
807                let src_str = inst.operands[0].trim().trim_start_matches('%');
808                let dst_str = inst.operands[1].trim().trim_start_matches('%');
809                // movd %gpr, %xmm  →  66 [REX.W] 0F 6E /r
810                if let (Ok(gpr), Some(xmm)) =
811                    (self.parse_register(inst.operands[0].trim()), parse_xmm(dst_str))
812                {
813                    let need_rex_w = inst.operands[0].trim().trim_start_matches('%').starts_with('r');
814                    code.push(0x66);
815                    if need_rex_w || gpr >= 8 || xmm >= 8 {
816                        code.push(0x40 | (if need_rex_w { 8 } else { 0 }) | ((xmm >> 3) << 2) | (gpr >> 3));
817                    }
818                    code.extend_from_slice(&[0x0F, 0x6E]);
819                    code.push(0xC0 | ((xmm & 7) << 3) | (gpr & 7));
820                } else if let (Some(xmm), Ok(gpr)) =
821                    (parse_xmm(src_str), self.parse_register(inst.operands[1].trim()))
822                {
823                    // movd %xmm, %gpr  →  66 [REX.W] 0F 7E /r
824                    let need_rex_w = inst.operands[1].trim().trim_start_matches('%').starts_with('r');
825                    code.push(0x66);
826                    if need_rex_w || xmm >= 8 || gpr >= 8 {
827                        code.push(0x40 | (if need_rex_w { 8 } else { 0 }) | ((xmm >> 3) << 2) | (gpr >> 3));
828                    }
829                    code.extend_from_slice(&[0x0F, 0x7E]);
830                    code.push(0xC0 | ((xmm & 7) << 3) | (gpr & 7));
831                } else {
832                    return Err(RasError::EncodingError("movd: expected gpr↔xmm operands".into()));
833                }
834            }
835            // xorps: 0F 57 /r
836            "xorps" => {
837                if inst.operands.len() != 2 {
838                    return Err(RasError::EncodingError("xorps requires 2 operands".into()));
839                }
840                let src = parse_xmm(inst.operands[0].trim().trim_start_matches('%'))
841                    .ok_or_else(|| RasError::EncodingError("xorps: expected xmm src".into()))?;
842                let dst = parse_xmm(inst.operands[1].trim().trim_start_matches('%'))
843                    .ok_or_else(|| RasError::EncodingError("xorps: expected xmm dst".into()))?;
844                if src >= 8 || dst >= 8 {
845                    code.push(0x40 | ((dst >> 3) << 2) | (src >> 3));
846                }
847                code.extend_from_slice(&[0x0F, 0x57]);
848                code.push(0xC0 | ((dst & 7) << 3) | (src & 7));
849            }
850            // xorpd: 66 0F 57 /r
851            "xorpd" => {
852                if inst.operands.len() != 2 {
853                    return Err(RasError::EncodingError("xorpd requires 2 operands".into()));
854                }
855                let src = parse_xmm(inst.operands[0].trim().trim_start_matches('%'))
856                    .ok_or_else(|| RasError::EncodingError("xorpd: expected xmm src".into()))?;
857                let dst = parse_xmm(inst.operands[1].trim().trim_start_matches('%'))
858                    .ok_or_else(|| RasError::EncodingError("xorpd: expected xmm dst".into()))?;
859                code.push(0x66);
860                if src >= 8 || dst >= 8 {
861                    code.push(0x40 | ((dst >> 3) << 2) | (src >> 3));
862                }
863                code.extend_from_slice(&[0x0F, 0x57]);
864                code.push(0xC0 | ((dst & 7) << 3) | (src & 7));
865            }
866            // movzbq: zero-extend byte to 64-bit
867            "movzbq" | "movzbl" | "movzx" => {
868                if inst.operands.len() != 2 {
869                    return Err(RasError::EncodingError(
870                        "movzbq requires 2 operands".to_string(),
871                    ));
872                }
873                let src_str = &inst.operands[0];
874                let dst_str = &inst.operands[1];
875                let src = self.parse_register(src_str)?;
876                let dst = self.parse_register(dst_str)?;
877                // REX.W 0F B6 /r
878                code.push(self.encode_rex(true, dst >> 3, 0, src >> 3));
879                code.push(0x0F);
880                code.push(0xB6);
881                code.push(0xC0 | ((dst & 7) << 3) | (src & 7));
882            }
883            _ => {
884                return Err(RasError::EncodingError(format!(
885                    "Unsupported instruction: {}",
886                    opcode
887                )));
888            }
889        }
890
891        self.position += code.len();
892        Ok(code)
893    }
894
895    fn current_position(&self) -> usize {
896        self.position
897    }
898}
899
900fn parse_xmm(s: &str) -> Option<u8> {
901    let s = s.trim_start_matches('%');
902    let s = s.strip_prefix("xmm")?;
903    let n: u8 = s.parse().ok()?;
904    if n < 16 { Some(n) } else { None }
905}