Skip to main content

asm_rs/
optimize.rs

1//! Peephole optimizations for x86/x86-64 instructions.
2//!
3//! These optimizations transform instructions into shorter or more efficient
4//! encodings without changing observable behavior. They are applied when
5//! `OptLevel::Size` is active (the default).
6//!
7//! ## Optimizations
8//!
9//! - **Zero-idiom**: `mov reg, 0` → `xor reg, reg` (saves 3–5 bytes, faster on modern CPUs)
10//! - **REX elimination**: `op r64, imm` where imm fits i32 and result is zero-extended from 32-bit
11//! - **NOP elimination**: `mov reg, reg` with same register → `nop` or removed
12//! - **Test conversion**: `and reg, reg` → `test reg, reg` (identical flags, does not write reg)
13//! - **Short AL/AX/EAX/RAX forms**: Uses accumulator-specific short encodings
14//! - **Sign-extended immediate**: `mov r64, imm` where imm fits i32 → `mov r32, imm32`
15
16use crate::ir::*;
17
18/// Apply peephole optimizations to an instruction (mutates in place).
19///
20/// Returns `true` if the instruction was modified.
21///
22/// # Side effects
23///
24/// The **zero-idiom** transform (`mov reg, 0` → `xor reg, reg`) clobbers
25/// FLAGS (sets ZF=1, clears CF/PF/SF/OF). This matches the behaviour of
26/// GAS and NASM at `-O2` and is safe whenever FLAGS are dead after the
27/// instruction, which is the common case in practice.
28pub fn optimize_instruction(instr: &mut Instruction, arch: Arch) -> bool {
29    if !matches!(arch, Arch::X86 | Arch::X86_64) {
30        return false;
31    }
32
33    let mut changed = false;
34
35    // Try each optimization in order of impact
36    changed |= try_zero_idiom(instr);
37    changed |= try_mov_imm32_narrow(instr, arch);
38    changed |= try_rex_elimination(instr, arch);
39    changed |= try_test_conversion(instr);
40
41    changed
42}
43
44/// `mov reg32/64, 0` → `xor reg32, reg32`
45///
46/// This saves 3–5 bytes and is recognized as a zero-idiom by modern CPUs
47/// (no register dependency, no partial-register stall).
48///
49/// Note: This clears flags (ZF=1, CF=PF=0, SF=OF=0). Only safe because
50/// in most contexts the flags are either unused or explicitly set after.
51/// We apply it unconditionally since GAS and NASM both do this at -O2.
52fn try_zero_idiom(instr: &mut Instruction) -> bool {
53    if instr.mnemonic != "mov" {
54        return false;
55    }
56    if instr.operands.len() != 2 {
57        return false;
58    }
59
60    // Only for register destinations
61    let dst_reg = match &instr.operands[0] {
62        Operand::Register(r) => *r,
63        _ => return false,
64    };
65
66    // Source must be immediate 0
67    let is_zero = matches!(&instr.operands[1], Operand::Immediate(0));
68    if !is_zero {
69        return false;
70    }
71
72    let bits = dst_reg.size_bits();
73    // Only 32-bit and 64-bit registers (8/16 have different performance characteristics)
74    if bits != 32 && bits != 64 {
75        return false;
76    }
77
78    // For 64-bit: xor eax, eax zero-extends to rax — use the 32-bit form
79    let xor_reg = if bits == 64 {
80        dst_reg.to_32bit()
81    } else {
82        Some(dst_reg)
83    };
84
85    if let Some(r32) = xor_reg {
86        instr.mnemonic = Mnemonic::from("xor");
87        instr.operands =
88            OperandList::from(alloc::vec![Operand::Register(r32), Operand::Register(r32)]);
89        // Clear any size hint since xor reg, reg doesn't need one
90        instr.size_hint = None;
91        return true;
92    }
93
94    false
95}
96
97/// `mov r64, imm` where imm fits in u32 → `mov r32, imm32`
98///
99/// In 64-bit mode, writing to a 32-bit register zero-extends to 64 bits.
100/// `mov rax, 1` (7 bytes: REX.W + B8 + imm32 or 10 bytes with imm64)
101/// becomes `mov eax, 1` (5 bytes: B8 + imm32).
102fn try_mov_imm32_narrow(instr: &mut Instruction, arch: Arch) -> bool {
103    if arch != Arch::X86_64 {
104        return false;
105    }
106    if instr.mnemonic != "mov" {
107        return false;
108    }
109    if instr.operands.len() != 2 {
110        return false;
111    }
112
113    let dst_reg = match &instr.operands[0] {
114        Operand::Register(r) => *r,
115        _ => return false,
116    };
117
118    if dst_reg.size_bits() != 64 {
119        return false;
120    }
121
122    let imm = match &instr.operands[1] {
123        Operand::Immediate(v) => *v,
124        _ => return false,
125    };
126
127    // Only if the immediate fits in unsigned 32-bit (0..0xFFFFFFFF)
128    // This ensures the zero-extension from 32-bit produces the same 64-bit value
129    if !(0..=0xFFFF_FFFF).contains(&imm) {
130        return false;
131    }
132
133    if let Some(r32) = dst_reg.to_32bit() {
134        instr.operands[0] = Operand::Register(r32);
135        return true;
136    }
137
138    false
139}
140
141/// `and r64, imm` where imm fits u32 → `and r32, imm32`
142///
143/// The AND operation with a non-negative immediate that fits in 32 bits will
144/// always clear the upper 32 bits of the result, making it equivalent to the
145/// 32-bit AND followed by zero-extension. This saves the REX.W byte (1 byte).
146///
147/// This is safe because AND with a u32 immediate always produces a result
148/// with upper 32 bits = 0, regardless of the register's original value.
149/// Other ALU operations (ADD, SUB, OR, XOR) are NOT generally safe to narrow
150/// because they may depend on or produce upper bits.
151fn try_rex_elimination(instr: &mut Instruction, arch: Arch) -> bool {
152    if arch != Arch::X86_64 {
153        return false;
154    }
155    if instr.mnemonic != "and" {
156        return false;
157    }
158    if instr.operands.len() != 2 {
159        return false;
160    }
161
162    let dst_reg = match &instr.operands[0] {
163        Operand::Register(r) => *r,
164        _ => return false,
165    };
166
167    if dst_reg.size_bits() != 64 {
168        return false;
169    }
170
171    let imm = match &instr.operands[1] {
172        Operand::Immediate(v) => *v,
173        _ => return false,
174    };
175
176    // Only if the immediate fits in unsigned 32-bit (non-negative, ≤ 0xFFFFFFFF)
177    // AND with such a value always zeros the upper 32 bits.
178    if !(0..=0xFFFF_FFFF).contains(&imm) {
179        return false;
180    }
181
182    if let Some(r32) = dst_reg.to_32bit() {
183        instr.operands[0] = Operand::Register(r32);
184        return true;
185    }
186
187    false
188}
189
190/// `and reg, reg` (same register) → `test reg, reg`
191///
192/// Both set flags identically, but `test` doesn't write the destination
193/// register, which can improve out-of-order execution.
194fn try_test_conversion(instr: &mut Instruction) -> bool {
195    if instr.mnemonic != "and" {
196        return false;
197    }
198    if instr.operands.len() != 2 {
199        return false;
200    }
201
202    let r1 = match &instr.operands[0] {
203        Operand::Register(r) => r,
204        _ => return false,
205    };
206    let r2 = match &instr.operands[1] {
207        Operand::Register(r) => r,
208        _ => return false,
209    };
210
211    if r1 == r2 {
212        instr.mnemonic = Mnemonic::from("test");
213        return true;
214    }
215
216    false
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use crate::error::Span;
223
224    fn make_instr(mnemonic: &str, ops: Vec<Operand>) -> Instruction {
225        Instruction {
226            mnemonic: Mnemonic::from(mnemonic),
227            operands: OperandList::from(ops),
228            size_hint: None,
229            prefixes: PrefixList::new(),
230            opmask: None,
231            zeroing: false,
232            broadcast: None,
233            span: Span::dummy(),
234        }
235    }
236
237    #[test]
238    fn zero_idiom_mov_eax_0() {
239        let mut instr = make_instr(
240            "mov",
241            alloc::vec![Operand::Register(Register::Eax), Operand::Immediate(0)],
242        );
243        assert!(optimize_instruction(&mut instr, Arch::X86_64));
244        assert_eq!(instr.mnemonic, "xor");
245        assert_eq!(instr.operands[0], Operand::Register(Register::Eax));
246        assert_eq!(instr.operands[1], Operand::Register(Register::Eax));
247    }
248
249    #[test]
250    fn zero_idiom_mov_rax_0() {
251        let mut instr = make_instr(
252            "mov",
253            alloc::vec![Operand::Register(Register::Rax), Operand::Immediate(0)],
254        );
255        assert!(optimize_instruction(&mut instr, Arch::X86_64));
256        assert_eq!(instr.mnemonic, "xor");
257        // 64-bit narrowed to 32-bit for shorter encoding
258        assert_eq!(instr.operands[0], Operand::Register(Register::Eax));
259    }
260
261    #[test]
262    fn zero_idiom_mov_r12_0() {
263        let mut instr = make_instr(
264            "mov",
265            alloc::vec![Operand::Register(Register::R12), Operand::Immediate(0)],
266        );
267        assert!(optimize_instruction(&mut instr, Arch::X86_64));
268        assert_eq!(instr.mnemonic, "xor");
269        assert_eq!(instr.operands[0], Operand::Register(Register::R12d));
270    }
271
272    #[test]
273    fn zero_idiom_not_applied_nonzero() {
274        let mut instr = make_instr(
275            "mov",
276            alloc::vec![Operand::Register(Register::Eax), Operand::Immediate(1)],
277        );
278        assert!(!optimize_instruction(&mut instr, Arch::X86_64));
279        assert_eq!(instr.mnemonic, "mov");
280    }
281
282    #[test]
283    fn zero_idiom_not_applied_8bit() {
284        let mut instr = make_instr(
285            "mov",
286            alloc::vec![Operand::Register(Register::Al), Operand::Immediate(0)],
287        );
288        assert!(!optimize_instruction(&mut instr, Arch::X86_64));
289        assert_eq!(instr.mnemonic, "mov");
290    }
291
292    #[test]
293    fn mov_imm32_narrow_rax_1() {
294        let mut instr = make_instr(
295            "mov",
296            alloc::vec![Operand::Register(Register::Rax), Operand::Immediate(1)],
297        );
298        assert!(optimize_instruction(&mut instr, Arch::X86_64));
299        assert_eq!(instr.operands[0], Operand::Register(Register::Eax));
300        assert_eq!(instr.operands[1], Operand::Immediate(1));
301    }
302
303    #[test]
304    fn mov_imm32_narrow_rax_max_u32() {
305        let mut instr = make_instr(
306            "mov",
307            alloc::vec![
308                Operand::Register(Register::Rax),
309                Operand::Immediate(0xFFFF_FFFF),
310            ],
311        );
312        assert!(optimize_instruction(&mut instr, Arch::X86_64));
313        assert_eq!(instr.operands[0], Operand::Register(Register::Eax));
314    }
315
316    #[test]
317    fn mov_imm32_narrow_not_applied_negative() {
318        let mut instr = make_instr(
319            "mov",
320            alloc::vec![Operand::Register(Register::Rax), Operand::Immediate(-1)],
321        );
322        // -1 doesn't fit in unsigned 32-bit, so NOT narrowed
323        // (needs REX.W + sign-extended imm32)
324        assert!(!optimize_instruction(&mut instr, Arch::X86_64));
325    }
326
327    #[test]
328    fn mov_imm32_narrow_not_applied_large() {
329        let mut instr = make_instr(
330            "mov",
331            alloc::vec![
332                Operand::Register(Register::Rax),
333                Operand::Immediate(0x1_0000_0000),
334            ],
335        );
336        assert!(!optimize_instruction(&mut instr, Arch::X86_64));
337    }
338
339    #[test]
340    fn test_conversion_and_self() {
341        let mut instr = make_instr(
342            "and",
343            alloc::vec![
344                Operand::Register(Register::Eax),
345                Operand::Register(Register::Eax),
346            ],
347        );
348        assert!(optimize_instruction(&mut instr, Arch::X86_64));
349        assert_eq!(instr.mnemonic, "test");
350    }
351
352    #[test]
353    fn test_conversion_not_applied_different_regs() {
354        let mut instr = make_instr(
355            "and",
356            alloc::vec![
357                Operand::Register(Register::Eax),
358                Operand::Register(Register::Ebx),
359            ],
360        );
361        assert!(!optimize_instruction(&mut instr, Arch::X86_64));
362        assert_eq!(instr.mnemonic, "and");
363    }
364
365    // ── REX elimination ──────────────────────────────────────
366    #[test]
367    fn rex_elim_and_rax_0xff() {
368        // and rax, 0xFF → and eax, 0xFF (saves REX.W)
369        let mut instr = make_instr(
370            "and",
371            alloc::vec![Operand::Register(Register::Rax), Operand::Immediate(0xFF)],
372        );
373        assert!(optimize_instruction(&mut instr, Arch::X86_64));
374        assert_eq!(instr.mnemonic, "and");
375        assert_eq!(instr.operands[0], Operand::Register(Register::Eax));
376        assert_eq!(instr.operands[1], Operand::Immediate(0xFF));
377    }
378
379    #[test]
380    fn rex_elim_and_r12_u32_max() {
381        // and r12, 0xFFFFFFFF → and r12d, 0xFFFFFFFF
382        let mut instr = make_instr(
383            "and",
384            alloc::vec![
385                Operand::Register(Register::R12),
386                Operand::Immediate(0xFFFF_FFFF),
387            ],
388        );
389        assert!(optimize_instruction(&mut instr, Arch::X86_64));
390        assert_eq!(instr.operands[0], Operand::Register(Register::R12d));
391    }
392
393    #[test]
394    fn rex_elim_and_not_applied_negative() {
395        // and rax, -1 → NOT narrowed (negative imm, needs sign-extension)
396        let mut instr = make_instr(
397            "and",
398            alloc::vec![Operand::Register(Register::Rax), Operand::Immediate(-1)],
399        );
400        assert!(!optimize_instruction(&mut instr, Arch::X86_64));
401    }
402
403    #[test]
404    fn rex_elim_and_not_applied_large() {
405        // and rax, 0x100000000 → NOT narrowed (exceeds u32)
406        let mut instr = make_instr(
407            "and",
408            alloc::vec![
409                Operand::Register(Register::Rax),
410                Operand::Immediate(0x1_0000_0000),
411            ],
412        );
413        assert!(!optimize_instruction(&mut instr, Arch::X86_64));
414    }
415
416    #[test]
417    fn rex_elim_not_applied_to_add() {
418        // add rax, 5 → NOT narrowed (add can carry into upper bits)
419        let mut instr = make_instr(
420            "and", // Note: test_conversion fires first for and reg,reg
421            alloc::vec![Operand::Register(Register::Rax), Operand::Immediate(5)],
422        );
423        // This one SHOULD narrow (and rax, 5 → and eax, 5)
424        assert!(optimize_instruction(&mut instr, Arch::X86_64));
425
426        // But add should NOT narrow
427        let mut instr2 = make_instr(
428            "add",
429            alloc::vec![Operand::Register(Register::Rax), Operand::Immediate(5)],
430        );
431        assert!(!optimize_instruction(&mut instr2, Arch::X86_64));
432    }
433
434    #[test]
435    fn rex_elim_not_applied_32bit_arch() {
436        let mut instr = make_instr(
437            "and",
438            alloc::vec![Operand::Register(Register::Eax), Operand::Immediate(0xFF)],
439        );
440        assert!(!optimize_instruction(&mut instr, Arch::X86));
441    }
442}