sp1_jit/backends/x86/instruction_impl.rs
1#![allow(clippy::fn_to_numeric_cast)]
2
3use super::{TranspilerBackend, CONTEXT, MEMORY_PTR, PC_OFFSET, TEMP_A, TEMP_B};
4use crate::{
5 impl_alu32_imm_opt, impl_alu_imm_opt, impl_risc_alu, impl_shift32_imm_opt, ComputeInstructions,
6 ControlFlowInstructions, JitContext, MemoryInstructions, RiscOperand, RiscRegister,
7 RiscvTranspiler, SystemInstructions,
8};
9use dynasmrt::{dynasm, x64::Rq, DynasmApi, DynasmLabelApi};
10
11impl ComputeInstructions for TranspilerBackend {
12 fn add(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
13 // lhs <- lhs + rhs (64-bit)
14 impl_alu_imm_opt!(self, rd, rs1, rs2, add);
15 }
16
17 fn mul(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
18 // rd <- rs1 * rs2 (64-bit)
19 impl_risc_alu!(self, rd, rs1, rs2, TEMP_A, TEMP_B, {
20 dynasm! {
21 self;
22 .arch x64;
23 imul Rq(TEMP_A), Rq(TEMP_B)
24 }
25 })
26 }
27
28 fn and(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
29 // rd <- rs1 & rs2 (64-bit)
30 impl_alu_imm_opt!(self, rd, rs1, rs2, and);
31 }
32
33 fn or(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
34 // rd <- rs1 | rs2 (64-bit)
35 impl_alu_imm_opt!(self, rd, rs1, rs2, or);
36 }
37
38 fn xor(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
39 // rd <- rs1 ^ rs2 (64-bit)
40 impl_alu_imm_opt!(self, rd, rs1, rs2, xor);
41 }
42
43 fn div(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
44 // X86 uses [RAX::RDX] for the 64-bit divide operation.
45 // So we need to sign extend the lhs into RDX.
46 //
47 // The quotient is stored in RAX, and the remainder is stored in RDX.
48 //
49 // We can just write the quotient back into lhs, and the remainder is discarded.
50 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load dividend directly into RAX
51 self.emit_risc_operand_load(rs2, TEMP_B);
52 dynasm! {
53 self;
54 .arch x64;
55
56 // ------------------------------------
57 // 1. Skip fault on div-by-zero
58 // ------------------------------------
59 test Rq(TEMP_B), Rq(TEMP_B); // ZF=1 if rhs == 0
60 jz >div_by_zero;
61
62 // Check for signed overflow (i64::MIN / -1)
63 mov rcx, -9223372036854775808; // i64::MIN
64 cmp rax, rcx;
65 jne >no_overflow;
66 cmp Rq(TEMP_B), -1;
67 jne >no_overflow;
68
69 // ------------------------------------
70 // 2. Handle overflow: i64::MIN / -1 = i64::MIN (wrapping)
71 // ------------------------------------
72 mov rax, -9223372036854775808; // Result is i64::MIN
73 jmp >done;
74
75 no_overflow:;
76 // ------------------------------------
77 // 3. Perform signed divide
78 // ------------------------------------
79 // dividend already in RAX (loaded directly)
80 cqo; // sign-extend RAX into RDX (64-bit)
81 idiv Rq(TEMP_B); // quotient → RAX, remainder → RDX
82 // quotient already in RAX
83 jmp >done;
84
85 // ------------------------------------
86 // 4. if rhs == 0
87 // ------------------------------------
88 div_by_zero:;
89 mov rax, -1; // quotient = -1 (RISC-V spec for signed div by zero)
90
91 done:
92 }
93 self.emit_risc_register_store(Rq::RAX as u8, None, rd);
94 }
95
96 fn divu(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
97 // lhs <- lhs / rhs (unsigned 64-bit; u64::MAX if rhs == 0)
98 // clobbers: RAX, RDX
99 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load dividend directly into RAX
100 self.emit_risc_operand_load(rs2, TEMP_B);
101 dynasm! {
102 self;
103 .arch x64;
104
105 // ----- skip fault on div-by-zero -----
106 test Rq(TEMP_B), Rq(TEMP_B); // ZF = 1 when rhs == 0
107 jz >div_by_zero;
108
109 // ----- perform unsigned divide -----
110 // dividend already in RAX (loaded directly)
111 xor rdx, rdx; // zero-extend: RDX = 0
112 div Rq(TEMP_B); // unsigned divide: RDX:RAX / rhs
113 // quotient already in RAX
114 jmp >done;
115
116 // ----- rhs == 0 -----
117 div_by_zero:;
118 mov rax, -1; // quotient = u64::MAX (0xFFFFFFFFFFFFFFFF)
119
120 done:
121 }
122 self.emit_risc_register_store(Rq::RAX as u8, None, rd);
123 }
124
125 fn mulh(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
126 // Signed multiply high: returns upper 64 bits of rs1 * rs2
127 // x86 imul for high multiply requires RAX and produces result in RDX
128 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load multiplicand directly into RAX
129 self.emit_risc_operand_load(rs2, TEMP_B);
130 dynasm! {
131 self;
132 .arch x64;
133
134 // multiplicand already in RAX (loaded directly)
135 imul Rq(TEMP_B) // signed 64×64 → 128; high → RDX
136 // high 64 bits already in RDX
137 }
138 self.emit_risc_register_store(Rq::RDX as u8, None, rd);
139 }
140
141 fn mulhu(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
142 // Unsigned multiply high: returns upper 64 bits of rs1 * rs2
143 // x86 mul for high multiply requires RAX and produces result in RDX
144 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load multiplicand directly into RAX
145 self.emit_risc_operand_load(rs2, TEMP_B);
146 dynasm! {
147 self;
148 .arch x64;
149
150 // multiplicand already in RAX (loaded directly)
151 mul Rq(TEMP_B) // unsigned 64×64 → 128; high → RDX
152 // high 64 bits already in RDX
153 }
154 self.emit_risc_register_store(Rq::RDX as u8, None, rd);
155 }
156
157 fn mulhsu(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
158 // Mixed multiply high: signed rs1 * unsigned rs2, returns upper 64 bits
159 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load signed multiplicand directly into RAX
160 self.emit_risc_operand_load(rs2, TEMP_B);
161 dynasm! {
162 self;
163 .arch x64;
164
165 // ──────────────────────────────────────────────────────────────
166 // 1. Move the **signed** left-hand operand (`TEMP_A`) into RAX.
167 // ✦ The x86-64 `mul` instruction always uses RAX as its implicit
168 // 64-bit source operand, so we must place `TEMP_A` there first.
169 // ──────────────────────────────────────────────────────────────
170 // multiplicand already in RAX (optimized load)
171
172 // ──────────────────────────────────────────────────────────────
173 // 2. Preserve a second copy of `TEMP_A` in RCX.
174 // ✦ The upcoming `mul` clobbers both RAX and RDX, erasing any
175 // trace of the original sign. We save `TEMP_A` in RCX so that
176 // we can later decide whether the fix-up for a *negative*
177 // multiplicand is required.
178 // ──────────────────────────────────────────────────────────────
179 mov rcx, rax;
180
181 // ──────────────────────────────────────────────────────────────
182 // 3. Unsigned 64×64-bit multiply:
183 // mul Rq(TEMP_B)
184 // ✦ Computes RDX:RAX = (unsigned)RAX × (unsigned)TEMP_B.
185 // The high 64 bits of the 128-bit product land in RDX.
186 // ──────────────────────────────────────────────────────────────
187 mul Rq(TEMP_B);
188
189 // ──────────────────────────────────────────────────────────────
190 // 4. Determine whether the *original* `TEMP_A` was negative.
191 // ✦ `test rcx, rcx` sets the sign flag from RCX (the saved `TEMP_A`).
192 // ✦ If the sign flag is *clear* (`TEMP_A` ≥ 0), we can skip the
193 // correction step because the high half already matches the
194 // semantics of the RISC-V MULHSU instruction.
195 // ──────────────────────────────────────────────────────────────
196 test rcx, rcx;
197 jns >store_high; // Jump if `TEMP_A` was non-negative.
198
199 // ──────────────────────────────────────────────────────────────
200 // 5. Fix-up for negative `TEMP_A` (signed × unsigned semantics):
201 // ✦ For a negative multiplicand, the unsigned `mul` delivered a
202 // product that is *2⁶⁴* too large in the high word. Subtracting
203 // `TEMP_B` from RDX removes that excess and yields the correct
204 // signed-high result.
205 // ──────────────────────────────────────────────────────────────
206 sub rdx, Rq(TEMP_B);
207
208 // ──────────────────────────────────────────────────────────────
209 // 6. Write the corrected high 64 bits back to the destination
210 // RISC register specified by `TEMP_A`.
211 // ──────────────────────────────────────────────────────────────
212 store_high:
213 // result already in RDX
214 }
215 self.emit_risc_register_store(Rq::RDX as u8, None, rd);
216 }
217
218 /// Signed remainder: `rd = rs1 % rs2`
219 /// *RISC-V rule*: if `rs2 == 0`, the result must be **0** (no fault).
220 fn rem(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
221 impl_risc_alu!(self, rd, rs1, rs2, TEMP_A, TEMP_B, {
222 dynasm! {
223 self;
224 .arch x64;
225
226 // ──────────────────────────────────────────────────────────────
227 // 0. Guard: if divisor is 0, skip the IDIV and return dividend
228 // ──────────────────────────────────────────────────────────────
229 test Rq(TEMP_B), Rq(TEMP_B); // ZF = 1 ⇒ TEMP_B == 0
230 jz >by_zero; // jump to fix-up path
231
232 // ──────────────────────────────────────────────────────────────
233 // 1. Check for signed overflow (i64::MIN % -1)
234 // ──────────────────────────────────────────────────────────────
235 mov rcx, -9223372036854775808; // Load i64::MIN into RCX
236 cmp Rq(TEMP_A), rcx; // Check if dividend == i64::MIN
237 jne >no_overflow;
238 cmp Rq(TEMP_B), -1; // Check if divisor == -1
239 jne >no_overflow;
240
241 // ──────────────────────────────────────────────────────────────
242 // Handle overflow: i64::MIN % -1 = 0 (wrapping)
243 // ──────────────────────────────────────────────────────────────
244 xor Rq(TEMP_A), Rq(TEMP_A); // TEMP_A = 0
245 jmp >done;
246
247 no_overflow:;
248 // ──────────────────────────────────────────────────────────────
249 // 2. Prepare the **signed** 64-bit dividend in EDX:EAX
250 // -------------------------------------------------
251 // • EAX ← low 32 bits of TEMP_A
252 // • CDQ sign-extends EAX into EDX
253 // → EDX:EAX now holds the two's-complement 64-bit value a
254 // ──────────────────────────────────────────────────────────────
255 mov rax, Rq(TEMP_A); // RAX = a (signed 64-bit)
256 cqo; // RDX = sign(a)
257
258 // ──────────────────────────────────────────────────────────────
259 // 3. Signed divide: a / b
260 // -------------------------------------------------
261 // • idiv r/m32 performs (EDX:EAX) ÷ TEMP_B
262 // – Quotient → EAX (ignored)
263 // – Remainder → EDX (what RISC-V REM returns)
264 // ──────────────────────────────────────────────────────────────
265 idiv Rq(TEMP_B); // signed divide
266
267 // ──────────────────────────────────────────────────────────────
268 // 4. Write the remainder (EDX) back to the destination register
269 // ──────────────────────────────────────────────────────────────
270 mov Rq(TEMP_A), rdx; // TEMP_A = remainder
271 jmp >done;
272
273 // ──────────────────────────────────────────────────────────────
274 // Divisor == 0 → result must be dividend (RISC-V spec)
275 // ──────────────────────────────────────────────────────────────
276 by_zero:;
277 // TEMP_A already contains the dividend, no change needed
278
279 done:
280 }
281 })
282 }
283
284 fn remu(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
285 impl_risc_alu!(self, rd, rs1, rs2, TEMP_A, TEMP_B, {
286 dynasm! {
287 self;
288 .arch x64;
289
290 // ──────────────────────────────────────────────────────────────
291 // 0. Guard against /0 → result = dividend (TEMP_A)
292 // ──────────────────────────────────────────────────────────────
293 test Rq(TEMP_B), Rq(TEMP_B);
294 jz >by_zero;
295
296 // ──────────────────────────────────────────────────────────────
297 // 1. Prepare the **unsigned** 128-bit dividend in RDX:RAX
298 // -------------------------------------------------
299 // • Zero-extend TEMP_A into RDX:RAX.
300 // ──────────────────────────────────────────────────────────────
301 mov rax, Rq(TEMP_A);
302 xor rdx, rdx;
303
304 // ──────────────────────────────────────────────────────────────
305 // 2. Unsigned divide: a / b
306 // -------------------------------------------------
307 // • div r/m64 performs (RDX:RAX) ÷ TEMP_B
308 // – Quotient → RAX (unused)
309 // – Remainder → RDX (what RISC-V REMU wants)
310 // ──────────────────────────────────────────────────────────────
311 div Rq(TEMP_B);
312
313 // ──────────────────────────────────────────────────────────────
314 // 3. Write the remainder back to the destination register.
315 // ──────────────────────────────────────────────────────────────
316 mov Rq(TEMP_A), rdx;
317 jmp >done;
318
319 // ──────────────────────────────────────────────────────────────
320 // Divisor == 0 → result must be dividend (RISC-V spec)
321 // ──────────────────────────────────────────────────────────────
322 by_zero:;
323 // TEMP_A already contains the dividend, no change needed
324
325 done:
326 }
327 })
328 }
329
330 fn sll(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
331 // We only can use the lower 6 bits for the shift count in 64-bit mode.
332 // In RV64I, this is also true!
333 //
334 // CL is an alias for the lower byte of RCX.
335 match rs2 {
336 RiscOperand::Immediate(imm) => {
337 self.emit_risc_operand_load(rs1, TEMP_A);
338 dynasm! {
339 self;
340 .arch x64;
341 // Direct immediate shift (lower 6 bits automatically masked by x86)
342 shl Rq(TEMP_A), (imm & 0x3F) as i8
343 }
344 self.emit_risc_register_store(TEMP_A, None, rd);
345 }
346 _ => {
347 self.emit_risc_operand_load(rs1, TEMP_A);
348 self.emit_risc_operand_load(rs2, Rq::RCX as u8);
349 dynasm! {
350 self;
351 .arch x64;
352 // ──────────────────────────────────────────────────────────────
353 // 1. Shift count is already in RCX (loaded directly).
354 // • Only the low 6 bits are used for 64-bit operands,
355 // which matches the RISC-V spec for RV64.
356 // ──────────────────────────────────────────────────────────────
357
358 // ──────────────────────────────────────────────────────────────
359 // 2. Logical left shift:
360 // Rq(TEMP_A) ← Rq(TEMP_A) << (CL & 0x3F)
361 // • `shl` fills zeros from the right as it shifts left.
362 // ──────────────────────────────────────────────────────────────
363 shl Rq(TEMP_A), cl // variable-count shift
364 }
365 self.emit_risc_register_store(TEMP_A, None, rd);
366 }
367 }
368 }
369
370 fn sra(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
371 match rs2 {
372 RiscOperand::Immediate(imm) => {
373 self.emit_risc_operand_load(rs1, TEMP_A);
374 dynasm! {
375 self;
376 .arch x64;
377 // Direct immediate arithmetic right shift
378 sar Rq(TEMP_A), (imm & 0x3F) as i8
379 }
380 self.emit_risc_register_store(TEMP_A, None, rd);
381 }
382 _ => {
383 self.emit_risc_operand_load(rs1, TEMP_A);
384 self.emit_risc_operand_load(rs2, Rq::RCX as u8);
385 dynasm! {
386 self;
387 .arch x64;
388 // ──────────────────────────────────────────────────────────────
389 // 1. Shift count is already in RCX (loaded directly).
390 // • Only the low 6 bits are used for 64-bit operands,
391 // which matches the RISC-V spec for RV64.
392 // ──────────────────────────────────────────────────────────────
393
394 // ──────────────────────────────────────────────────────────────
395 // 2. Arithmetic right shift:
396 // Rq(TEMP_A) ← (signed)Rq(TEMP_A) >> (CL & 0x3F)
397 // • `sar` replicates the sign bit as it shifts, so
398 // negative values stay negative after the operation.
399 // ──────────────────────────────────────────────────────────────
400 sar Rq(TEMP_A), cl // variable-count shift
401 }
402 self.emit_risc_register_store(TEMP_A, None, rd);
403 }
404 }
405 }
406
407 fn srl(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
408 match rs2 {
409 RiscOperand::Immediate(imm) => {
410 self.emit_risc_operand_load(rs1, TEMP_A);
411 dynasm! {
412 self;
413 .arch x64;
414 // Direct immediate logical right shift
415 shr Rq(TEMP_A), (imm & 0x3F) as i8
416 }
417 self.emit_risc_register_store(TEMP_A, None, rd);
418 }
419 _ => {
420 self.emit_risc_operand_load(rs1, TEMP_A);
421 self.emit_risc_operand_load(rs2, Rq::RCX as u8);
422 dynasm! {
423 self;
424 .arch x64;
425 // ──────────────────────────────────────────────────────────────
426 // 1. Shift count is already in RCX (loaded directly).
427 // ──────────────────────────────────────────────────────────────
428
429 // ──────────────────────────────────────────────────────────────
430 // 2. Logical right shift:
431 // Rq(TEMP_A) ← (unsigned)Rq(TEMP_A) >> (CL & 0x3F)
432 // • `shr` always inserts zeros from the left, regardless
433 // of the operand's sign.
434 // ──────────────────────────────────────────────────────────────
435 shr Rq(TEMP_A), cl
436 }
437 self.emit_risc_register_store(TEMP_A, None, rd);
438 }
439 }
440 }
441
442 fn slt(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
443 match rs2 {
444 RiscOperand::Immediate(imm) => {
445 self.emit_risc_operand_load(rs1, TEMP_A);
446 dynasm! {
447 self;
448 .arch x64;
449
450 cmp Rq(TEMP_A), imm;
451
452 // ──────────────────────────────────────────────────────────────
453 // 2. setl r/m8
454 // • Writes 1 to the target byte if (SF ≠ OF)
455 // which is the signed "less than" condition.
456 // • We store straight into the low-byte of TEMP_A —
457 // dynasm's `Rb()` gives us that alias.
458 // ──────────────────────────────────────────────────────────────
459 setl Rb(TEMP_A); // byte = 1 if TEMP_A < imm (signed)
460
461 // ──────────────────────────────────────────────────────────────
462 // 3. Zero-extend that byte back to a full 32-bit register so
463 // that the RISC register ends up with 0x0000_0000 or 0x0000_0001.
464 // ──────────────────────────────────────────────────────────────
465 movzx Rq(TEMP_A), Rb(TEMP_A) // Rd(TEMP_A) = 0 or 1
466 }
467 self.emit_risc_register_store(TEMP_A, None, rd);
468 }
469 _ => {
470 self.emit_risc_operand_load(rs1, TEMP_A);
471 self.emit_risc_operand_load(rs2, TEMP_B);
472 dynasm! {
473 self;
474 .arch x64;
475
476 cmp Rq(TEMP_A), Rq(TEMP_B);
477
478 // ──────────────────────────────────────────────────────────────
479 // 2. setl r/m8
480 // • Writes 1 to the target byte if (SF ≠ OF)
481 // which is the signed "less than" condition.
482 // • We store straight into the low-byte of TEMP_A —
483 // dynasm's `Rb()` gives us that alias.
484 // ──────────────────────────────────────────────────────────────
485 setl Rb(TEMP_A); // byte = 1 if TEMP_A < TEMP_B (signed)
486
487 // ──────────────────────────────────────────────────────────────
488 // 3. Zero-extend that byte back to a full 32-bit register so
489 // that the RISC register ends up with 0x0000_0000 or 0x0000_0001.
490 // ──────────────────────────────────────────────────────────────
491 movzx Rq(TEMP_A), Rb(TEMP_A) // Rd(TEMP_A) = 0 or 1
492 }
493 self.emit_risc_register_store(TEMP_A, None, rd);
494 }
495 }
496 }
497
498 fn sltu(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
499 match rs2 {
500 RiscOperand::Immediate(imm) => {
501 self.emit_risc_operand_load(rs1, TEMP_A);
502 dynasm! {
503 self;
504 .arch x64;
505
506 cmp Rq(TEMP_A), imm;
507
508 // ------------------------------------
509 // `setb` ("below") checks the Carry Flag (CF):
510 // CF = 1 iff TEMP_A < imm in an *unsigned* sense.
511 // ------------------------------------
512 setb Rb(TEMP_A);
513
514 // ------------------------------------
515 // Zero-extend to 32 bits (0 or 1).
516 // ------------------------------------
517 movzx Rq(TEMP_A), Rb(TEMP_A)
518 }
519 self.emit_risc_register_store(TEMP_A, None, rd);
520 }
521 _ => {
522 self.emit_risc_operand_load(rs1, TEMP_A);
523 self.emit_risc_operand_load(rs2, TEMP_B);
524 dynasm! {
525 self;
526 .arch x64;
527
528 cmp Rq(TEMP_A), Rq(TEMP_B);
529
530 // ------------------------------------
531 // `setb` ("below") checks the Carry Flag (CF):
532 // CF = 1 iff TEMP_A < TEMP_B in an *unsigned* sense.
533 // ------------------------------------
534 setb Rb(TEMP_A);
535
536 // ------------------------------------
537 // Zero-extend to 32 bits (0 or 1).
538 // ------------------------------------
539 movzx Rq(TEMP_A), Rb(TEMP_A)
540 }
541 self.emit_risc_register_store(TEMP_A, None, rd);
542 }
543 }
544 }
545
546 fn sub(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
547 // rd <- rs1 - rs2 (64-bit)
548 impl_risc_alu!(self, rd, rs1, rs2, TEMP_A, TEMP_B, {
549 dynasm! {
550 self;
551 .arch x64;
552 sub Rq(TEMP_A), Rq(TEMP_B)
553 }
554 })
555 }
556
557 fn addw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
558 // addw performs 32-bit addition on lower 32 bits, then sign-extends result to 64 bits
559 impl_alu32_imm_opt!(self, rd, rs1, rs2, add);
560 }
561
562 fn subw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
563 // subw performs 32-bit subtraction on lower 32 bits, then sign-extends result to 64 bits
564 impl_risc_alu!(self, rd, rs1, rs2, TEMP_A, TEMP_B, {
565 dynasm! {
566 self;
567 .arch x64;
568 sub Rd(TEMP_A), Rd(TEMP_B);
569 movsxd Rq(TEMP_A), Rd(TEMP_A)
570 }
571 })
572 }
573
574 fn sllw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
575 // sllw performs 32-bit shift left, then sign-extends result to 64 bits
576 impl_shift32_imm_opt!(self, rd, rs1, rs2, shl);
577 }
578
579 fn srlw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
580 // srlw performs logical right shift on lower 32 bits, then sign-extends result to 64 bits
581 impl_shift32_imm_opt!(self, rd, rs1, rs2, shr);
582 }
583
584 fn sraw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
585 // sraw performs arithmetic right shift on lower 32 bits, then sign-extends result to 64
586 // bits
587 impl_shift32_imm_opt!(self, rd, rs1, rs2, sar);
588 }
589
590 fn mulw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
591 // mulw performs 32-bit multiplication, then sign-extends result to 64 bits
592 impl_risc_alu!(self, rd, rs1, rs2, TEMP_A, TEMP_B, {
593 dynasm! {
594 self;
595 .arch x64;
596
597 // Perform 32-bit multiplication
598 imul Rd(TEMP_A), Rd(TEMP_B);
599
600 // Sign-extend the 32-bit result to 64 bits
601 movsxd Rq(TEMP_A), Rd(TEMP_A)
602 }
603 });
604 }
605
606 fn divw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
607 // divw performs 32-bit signed division, then sign-extends result to 64 bits
608 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load dividend directly into RAX
609 self.emit_risc_operand_load(rs2, TEMP_B);
610 dynasm! {
611 self;
612 .arch x64;
613
614 // Check for division by zero
615 test Rd(TEMP_B), Rd(TEMP_B);
616 jz >div_by_zero;
617
618 // Handle 32-bit overflow case on x86-64: INT_MIN / -1 traps (#DE)
619 cmp eax, i32::MIN; // dividend == INT_MIN?
620 jne >do_div;
621 cmp Rd(TEMP_B), -1; // divisor == -1?
622 jne >do_div;
623 mov eax, i32::MIN; // result = INT_MIN
624 movsxd rax, eax; // sign-extend to 64-bit
625 jmp >done;
626
627 do_div:;
628 // Perform signed 32-bit divide
629 // dividend already in EAX (loaded directly into RAX)
630 cdq; // sign-extend EAX into EDX
631 idiv Rd(TEMP_B); // quotient → EAX
632 movsxd rax, eax; // sign-extend result to 64 bits
633 jmp >done;
634
635 // Handle overflow: i32::MIN / -1 = i32::MIN (wrapping)
636 overflow:;
637 mov rax, i32::MIN;
638 jmp >done;
639
640 div_by_zero:;
641 // For RV64I, divw by zero returns 0xFFFFFFFFFFFFFFFF (-1 sign-extended)
642 mov rax, -1;
643
644 done:
645 }
646 self.emit_risc_register_store(Rq::RAX as u8, None, rd);
647 }
648
649 fn divuw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
650 // divuw performs 32-bit unsigned division, then sign-extends result to 64 bits
651 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load dividend directly into RAX
652 self.emit_risc_operand_load(rs2, TEMP_B);
653 dynasm! {
654 self;
655 .arch x64;
656
657 // Check for division by zero
658 test Rd(TEMP_B), Rd(TEMP_B);
659 jz >div_by_zero;
660
661 // Perform unsigned 32-bit divide
662 // dividend already in EAX (loaded directly into RAX)
663 xor edx, edx; // zero-extend
664 div Rd(TEMP_B); // quotient → EAX
665 movsxd rax, eax; // sign-extend result to 64 bits
666 jmp >done;
667
668 div_by_zero:;
669 // For RV64I, divuw by zero returns 0xFFFFFFFFFFFFFFFF (-1 sign-extended)
670 mov rax, -1;
671
672 done:
673 }
674 self.emit_risc_register_store(Rq::RAX as u8, None, rd);
675 }
676
677 fn remw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
678 // remw performs 32-bit signed remainder, then sign-extends result to 64 bits
679 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load dividend directly into RAX
680 self.emit_risc_operand_load(rs2, TEMP_B);
681 dynasm! {
682 self;
683 .arch x64;
684
685 // Check for division by zero
686 test Rd(TEMP_B), Rd(TEMP_B);
687 jz >rem_by_zero;
688
689 // Handle 32-bit overflow case on x86-64: INT_MIN / -1 traps (#DE)
690 cmp eax, i32::MIN; // dividend == INT_MIN?
691 jne >do_div;
692 cmp Rd(TEMP_B), -1; // divisor == -1?
693 jne >do_div;
694 mov eax, i32::MIN; // result = INT_MIN
695 movsxd rax, eax; // sign-extend to 64-bit
696 jmp >done;
697
698 do_div:;
699 // Perform signed 32-bit remainder
700 // dividend already in EAX (loaded directly into RAX)
701 cdq; // sign-extend EAX into EDX
702 idiv Rd(TEMP_B); // remainder → EDX
703 movsxd rdx, edx; // sign-extend result to 64 bits
704 jmp >done;
705
706 // Handle overflow: i32::MIN % -1 = 0 (wrapping)
707 overflow:;
708 xor rdx, rdx; // remainder = 0
709 jmp >done;
710
711 rem_by_zero:;
712 // For RV64I, remw by zero returns the dividend (RAX) sign-extended
713 movsxd rdx, eax;
714
715 done:
716 }
717 self.emit_risc_register_store(Rq::RDX as u8, None, rd);
718 }
719
720 fn remuw(&mut self, rd: RiscRegister, rs1: RiscOperand, rs2: RiscOperand) {
721 // remuw performs 32-bit unsigned remainder, then sign-extends result to 64 bits
722 self.emit_risc_operand_load(rs1, Rq::RAX as u8); // Load dividend directly into RAX
723 self.emit_risc_operand_load(rs2, TEMP_B);
724 dynasm! {
725 self;
726 .arch x64;
727
728 // Check for division by zero
729 test Rd(TEMP_B), Rd(TEMP_B);
730 jz >rem_by_zero;
731
732 // Perform unsigned 32-bit remainder
733 // dividend already in EAX (loaded directly into RAX)
734 xor edx, edx; // zero-extend (clear upper 32 bits)
735 div Rd(TEMP_B); // remainder → EDX
736 movsxd rdx, edx; // sign-extend result to 64 bits
737 jmp >done;
738
739 rem_by_zero:;
740 // For RV64I, remuw by zero returns the dividend (RAX) sign-extended
741 movsxd rdx, eax;
742
743 done:
744 }
745 self.emit_risc_register_store(Rq::RDX as u8, None, rd);
746 }
747
748 fn auipc(&mut self, rd: RiscRegister, imm: u64) {
749 // rd <- pc + imm
750
751 // ------------------------------------
752 // 1. Copy the current PC into TEMP_A
753 // 2. Increment the PC by the immediate.
754 // ------------------------------------
755 let value = self.pc_current.wrapping_add(imm);
756
757 // Store the result in the destination register.
758 self.emit_risc_register_store(TEMP_A, Some(value), rd);
759 }
760
761 fn lui(&mut self, rd: RiscRegister, imm: u64) {
762 // rd <- imm << 12
763 // LUI loads a 20-bit immediate shifted left by 12 bits into the destination register
764 dynasm! {
765 self;
766 .arch x64;
767
768 mov Rq(TEMP_A), imm as i32
769 }
770
771 // Store the result in the destination register.
772 self.emit_risc_register_store(TEMP_A, None, rd);
773 }
774}
775
776impl ControlFlowInstructions for TranspilerBackend {
777 fn jal(&mut self, rd: RiscRegister, imm: u64) {
778 // Mark that a control flow instruction has been inserted.
779 self.control_flow_instruction_inserted = true;
780
781 let target_pc = self.pc_current.wrapping_add(imm);
782 let next_pc = self.pc_current.wrapping_add(4);
783
784 // Store the current PC + 4 into the destination register.
785 self.emit_risc_register_store(TEMP_A, Some(next_pc), rd);
786
787 // Adjust the PC store in the context by the immediate.
788 self.update_pc(TEMP_B, target_pc);
789
790 // Add the base amount of cycles for the instruction.
791 self.bump_clk();
792
793 // We know the jump target at transpile time, we can issue jump
794 // to it directly, skipping jump table
795 self.end_branch(Some(target_pc));
796 }
797
798 fn jalr(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
799 // Mark that a control flow instruction has been inserted.
800 self.control_flow_instruction_inserted = true;
801
802 // ------------------------------------
803 // 1. If rs1 is immediate, we can do fast jumping
804 // ------------------------------------
805 let jump_target =
806 self.reg_values.get(&rs1).map(|rs1_imm| rs1_imm.wrapping_add(imm) & !1_u64);
807
808 // ------------------------------------
809 // 2. Update PC value
810 // ------------------------------------
811 self.emit_risc_operand_load(rs1.into(), TEMP_A);
812 dynasm! {
813 self;
814 .arch x64;
815
816 add Rq(TEMP_A), imm as i32;
817 and Rq(TEMP_A), -2;
818 mov QWORD [Rq(CONTEXT) + PC_OFFSET], Rq(TEMP_A)
819 }
820
821 // ------------------------------------
822 // 3. Compute & store next PC into rd.
823 // ------------------------------------
824 let next_pc = self.pc_current + 4;
825 self.emit_risc_register_store(TEMP_B, Some(next_pc), rd);
826
827 // Add the base amount of cycles for the instruction.
828 self.bump_clk();
829
830 self.end_branch(jump_target);
831 }
832
833 fn beq(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
834 // Mark that a control flow instruction has been inserted.
835 self.control_flow_instruction_inserted = true;
836
837 // Add the base amount of cycles for the instruction.
838 self.bump_clk();
839
840 self.emit_risc_operand_load(rs1.into(), TEMP_A);
841 self.emit_risc_operand_load(rs2.into(), TEMP_B);
842
843 let branched_target = self.pc_current.wrapping_add(imm);
844 let not_branched_target = self.pc_current.wrapping_add(4);
845
846 // Compare the registers
847 dynasm! {
848 self;
849 .arch x64;
850
851 // Check if rs1 == rs2
852 cmp Rq(TEMP_A), Rq(TEMP_B);
853 // If rs1 != rs2, jump to not_branched, since that would imply !(rs1 == rs2)
854 jne >not_branched
855 }
856 // ------------------------------------
857 // Branched:
858 // 0. Bump the pc by the immediate.
859 // ------------------------------------
860 self.update_pc(Rq::RAX as u8, branched_target);
861 self.end_branch(Some(branched_target));
862
863 dynasm! {
864 self;
865 .arch x64;
866
867 // ------------------------------------
868 // Not branched:
869 // ------------------------------------
870 not_branched:
871 }
872 // ------------------------------------
873 // 1. Bump the pc by 4
874 // ------------------------------------
875 self.update_pc(Rq::RAX as u8, not_branched_target);
876 self.end_branch(Some(not_branched_target));
877 }
878
879 fn bge(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
880 // Mark that a control flow instruction has been inserted.
881 self.control_flow_instruction_inserted = true;
882
883 // Add the base amount of cycles for the instruction.
884 self.bump_clk();
885
886 self.emit_risc_operand_load(rs1.into(), TEMP_A);
887 self.emit_risc_operand_load(rs2.into(), TEMP_B);
888
889 let branched_target = self.pc_current.wrapping_add(imm);
890 let not_branched_target = self.pc_current.wrapping_add(4);
891
892 dynasm! {
893 self;
894 .arch x64;
895
896 // Check if rs1 == rs2
897 cmp Rq(TEMP_A), Rq(TEMP_B);
898 // If rs1 < rs2, jump to not_branched, since that would imply !(rs1 >= rs2)
899 jl >not_branched
900 }
901 // ------------------------------------
902 // Branched:
903 // 0. Bump the pc by the immediate.
904 // ------------------------------------
905 self.update_pc(Rq::RAX as u8, branched_target);
906 self.end_branch(Some(branched_target));
907
908 dynasm! {
909 self;
910 .arch x64;
911
912 // ------------------------------------
913 // Not branched:
914 // ------------------------------------
915 not_branched:
916 }
917 // ------------------------------------
918 // 1. Bump the pc by 4
919 // ------------------------------------
920 self.update_pc(Rq::RAX as u8, not_branched_target);
921 self.end_branch(Some(not_branched_target));
922 }
923
924 fn bgeu(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
925 // Mark that a control flow instruction has been inserted.
926 self.control_flow_instruction_inserted = true;
927
928 // Add the base amount of cycles for the instruction.
929 self.bump_clk();
930
931 self.emit_risc_operand_load(rs1.into(), TEMP_A);
932 self.emit_risc_operand_load(rs2.into(), TEMP_B);
933
934 let branched_target = self.pc_current.wrapping_add(imm);
935 let not_branched_target = self.pc_current.wrapping_add(4);
936
937 dynasm! {
938 self;
939 .arch x64;
940
941 cmp Rq(TEMP_A), Rq(TEMP_B);
942 // If rs1 < rs2, jump to not_branched, since that would imply !(rs1 >= rs2)
943 jb >not_branched
944 }
945 // ------------------------------------
946 // Branched:
947 // 0. Bump the pc by the immediate.
948 // ------------------------------------
949 self.update_pc(Rq::RAX as u8, branched_target);
950 self.end_branch(Some(branched_target));
951
952 dynasm! {
953 self;
954 .arch x64;
955
956 // ------------------------------------
957 // Not branched:
958 // ------------------------------------
959 not_branched:
960 }
961 // ------------------------------------
962 // 1. Bump the pc by 4
963 // ------------------------------------
964 self.update_pc(Rq::RAX as u8, not_branched_target);
965 self.end_branch(Some(not_branched_target));
966 }
967
968 fn blt(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
969 // Mark that a control flow instruction has been inserted.
970 self.control_flow_instruction_inserted = true;
971
972 // Add the base amount of cycles for the instruction.
973 self.bump_clk();
974
975 self.emit_risc_operand_load(rs1.into(), TEMP_A);
976 self.emit_risc_operand_load(rs2.into(), TEMP_B);
977
978 let branched_target = self.pc_current.wrapping_add(imm);
979 let not_branched_target = self.pc_current.wrapping_add(4);
980
981 dynasm! {
982 self;
983 .arch x64;
984
985 // ------------------------------------
986 // Compare the two registers.
987 //
988 cmp Rq(TEMP_A), Rq(TEMP_B); // signed compare
989 jge >not_branched // rs1 ≥ rs2 → skip
990 }
991 // ------------------------------------
992 // Branched:
993 // 0. Bump the pc by the immediate.
994 // ------------------------------------
995 self.update_pc(Rq::RAX as u8, branched_target);
996 self.end_branch(Some(branched_target));
997
998 dynasm! {
999 self;
1000 .arch x64;
1001
1002 // ------------------------------------
1003 // Not branched:
1004 // ------------------------------------
1005 not_branched:
1006 }
1007 // ------------------------------------
1008 // 1. Bump the pc by 4
1009 // ------------------------------------
1010 self.update_pc(Rq::RAX as u8, not_branched_target);
1011 self.end_branch(Some(not_branched_target));
1012 }
1013
1014 fn bltu(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
1015 // Mark that a control flow instruction has been inserted.
1016 self.control_flow_instruction_inserted = true;
1017
1018 // Add the base amount of cycles for the instruction.
1019 self.bump_clk();
1020
1021 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1022 self.emit_risc_operand_load(rs2.into(), TEMP_B);
1023
1024 let branched_target = self.pc_current.wrapping_add(imm);
1025 let not_branched_target = self.pc_current.wrapping_add(4);
1026
1027 dynasm! {
1028 self;
1029 .arch x64;
1030 cmp Rq(TEMP_A), Rq(TEMP_B); // unsigned compare
1031 jae >not_branched // rs1 ≥ rs2 (unsigned) → skip
1032 }
1033 // ------------------------------------
1034 // Branched:
1035 // 0. Bump the pc by the immediate.
1036 // ------------------------------------
1037 self.update_pc(Rq::RAX as u8, branched_target);
1038 self.end_branch(Some(branched_target));
1039
1040 dynasm! {
1041 self;
1042 .arch x64;
1043
1044 // ------------------------------------
1045 // Not branched:
1046 // ------------------------------------
1047 not_branched:
1048 }
1049 // ------------------------------------
1050 // 1. Bump the pc by 4
1051 // ------------------------------------
1052 self.update_pc(Rq::RAX as u8, not_branched_target);
1053 self.end_branch(Some(not_branched_target));
1054 }
1055
1056 fn bne(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
1057 // Mark that a control flow instruction has been inserted.
1058 self.control_flow_instruction_inserted = true;
1059
1060 // Add the base amount of cycles for the instruction.
1061 self.bump_clk();
1062
1063 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1064 self.emit_risc_operand_load(rs2.into(), TEMP_B);
1065
1066 let branched_target = self.pc_current.wrapping_add(imm);
1067 let not_branched_target = self.pc_current.wrapping_add(4);
1068
1069 dynasm! {
1070 self;
1071 .arch x64;
1072 cmp Rq(TEMP_A), Rq(TEMP_B); // sets ZF
1073 je >not_branched // rs1 == rs2 → skip
1074 }
1075 // ------------------------------------
1076 // Branched:
1077 // 0. Bump the pc by the immediate.
1078 // ------------------------------------
1079 self.update_pc(Rq::RAX as u8, branched_target);
1080 self.end_branch(Some(branched_target));
1081
1082 dynasm! {
1083 self;
1084 .arch x64;
1085
1086 // ------------------------------------
1087 // Not branched:
1088 // ------------------------------------
1089 not_branched:
1090 }
1091 // ------------------------------------
1092 // 1. Bump the pc by 4
1093 // ------------------------------------
1094 self.update_pc(Rq::RAX as u8, not_branched_target);
1095 self.end_branch(Some(not_branched_target));
1096 }
1097}
1098
1099impl MemoryInstructions for TranspilerBackend {
1100 fn lb(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
1101 self.may_early_exit = true;
1102
1103 // ------------------------------------
1104 // Load in the base address and the phy sical memory pointer.
1105 // ------------------------------------
1106 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1107
1108 dynasm! {
1109 self;
1110 .arch x64;
1111
1112 // ------------------------------------
1113 // Add the immediate to the base address
1114 // Scaled to account for the entry size.
1115 //
1116 // TEMP_A = rs1 + imm = addr
1117 // ------------------------------------
1118 add Rq(TEMP_A), imm as i32;
1119
1120 // ------------------------------------
1121 // Store the intra-word offset.
1122 // ------------------------------------
1123 mov rax, Rq(TEMP_A);
1124 and rax, 7;
1125
1126 // ------------------------------------
1127 // Align to the start of the word.
1128 //
1129 // Scale to account for the entry size.
1130 // ------------------------------------
1131 and Rq(TEMP_A), -8;
1132 shl Rq(TEMP_A), 1;
1133
1134 // ------------------------------------
1135 // Add the risc32 byte offset to the physical memory pointer
1136 //
1137 // TEMP_A = addr + physical_memory_pointer
1138 // ------------------------------------
1139 add Rq(TEMP_A), Rq(MEMORY_PTR);
1140
1141 // ------------------------------------
1142 // 4. Load byte → sign-extend to 32 bits
1143 //
1144 // TEMP_B = clk
1145 // TEMP_A = addr + physical_memory_pointer
1146 // [addr + physical_memory_pointer] = clk
1147 // TEMP_A = [addr + physical_memory_pointer + 8]
1148 // ------------------------------------
1149 movsx Rq(TEMP_A), BYTE [Rq(TEMP_A) + 8 + rax]
1150 }
1151
1152 // 4. Write back to destination register
1153 self.emit_risc_register_store(TEMP_A, None, rd);
1154 }
1155
1156 fn lbu(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
1157 self.may_early_exit = true;
1158
1159 // ------------------------------------
1160 // Load in the base address
1161 // and the physical memory pointer.
1162 // ------------------------------------
1163 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1164
1165 dynasm! {
1166 self;
1167 .arch x64;
1168
1169 // ------------------------------------
1170 // Add the immediate to the base address
1171 //
1172 // TEMP_A = rs1 + imm = addr
1173 // ------------------------------------
1174 add Rq(TEMP_A), imm as i32;
1175
1176 // ------------------------------------
1177 // Store the intra-word offset.
1178 // ------------------------------------
1179 mov rax, Rq(TEMP_A);
1180 and rax, 7;
1181
1182 // ------------------------------------
1183 // Align to the start of the word.
1184 //
1185 // Scale to account for the entry size.
1186 // ------------------------------------
1187 and Rq(TEMP_A), -8;
1188 shl Rq(TEMP_A), 1;
1189
1190 // ------------------------------------
1191 // Add the risc32 byte offset to the physical memory pointer
1192 //
1193 // TEMP_A = addr + physical_memory_pointer
1194 // ------------------------------------
1195 add Rq(TEMP_A), Rq(MEMORY_PTR);
1196
1197 // ------------------------------------
1198 // Load byte → zero-extend to 32 bits
1199 // ------------------------------------
1200 movzx Rq(TEMP_A), BYTE [Rq(TEMP_A) + 8 + rax]
1201 }
1202
1203 self.emit_risc_register_store(TEMP_A, None, rd);
1204 }
1205
1206 fn lh(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
1207 self.may_early_exit = true;
1208
1209 // ------------------------------------
1210 // Load in the base address
1211 // and the physical memory pointer.
1212 // ------------------------------------
1213 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1214
1215 dynasm! {
1216 self;
1217 .arch x64;
1218
1219 // ------------------------------------
1220 // Add the immediate to the base address
1221 //
1222 // TEMP_A = rs1 + imm = addr
1223 // ------------------------------------
1224 add Rq(TEMP_A), imm as i32;
1225
1226 // ------------------------------------
1227 // Store the intra-word offset.
1228 // ------------------------------------
1229 mov rax, Rq(TEMP_A);
1230 and rax, 7;
1231
1232 // ------------------------------------
1233 // Align to the start of the word.
1234 //
1235 // Scale to account for the entry size.
1236 // ------------------------------------
1237 and Rq(TEMP_A), -8;
1238 shl Rq(TEMP_A), 1;
1239
1240 // ------------------------------------
1241 // Add the risc32 byte offset to the physical memory pointer
1242 //
1243 // TEMP_A = addr + physical_memory_pointer
1244 // ------------------------------------
1245 add Rq(TEMP_A), Rq(MEMORY_PTR);
1246
1247 // ------------------------------------
1248 // Load half-word → sign-extend to 32 bits
1249 // ------------------------------------
1250 movsx Rq(TEMP_A), WORD [Rq(TEMP_A) + 8 + rax]
1251 }
1252
1253 self.emit_risc_register_store(TEMP_A, None, rd);
1254 }
1255
1256 fn lhu(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
1257 self.may_early_exit = true;
1258
1259 // ------------------------------------
1260 // Load in the base address
1261 // and the physical memory pointer.
1262 // ------------------------------------
1263 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1264
1265 dynasm! {
1266 self;
1267 .arch x64;
1268
1269 // ------------------------------------
1270 // Add the immediate to the base address
1271 //
1272 // TEMP_A = rs1 + imm = addr
1273 // ------------------------------------
1274 add Rq(TEMP_A), imm as i32;
1275
1276 // ------------------------------------
1277 // Store the intra-word offset.
1278 // ------------------------------------
1279 mov rax, Rq(TEMP_A);
1280 and rax, 7;
1281
1282 // ------------------------------------
1283 // Align to the start of the word.
1284 //
1285 // Scale to account for the entry size.
1286 // ------------------------------------
1287 and Rq(TEMP_A), -8;
1288 shl Rq(TEMP_A), 1;
1289
1290 // ------------------------------------
1291 // Add the risc32 byte offset to the physical memory pointer
1292 //
1293 // TEMP_A = addr + physical_memory_pointer
1294 // ------------------------------------
1295 add Rq(TEMP_A), Rq(MEMORY_PTR);
1296
1297 // ------------------------------------
1298 // Load 16 bits, zero-extend to 32 bits
1299 // ------------------------------------
1300 movzx Rq(TEMP_A), WORD [Rq(TEMP_A) + 8 + rax]
1301 }
1302
1303 self.emit_risc_register_store(TEMP_A, None, rd);
1304 }
1305
1306 fn lw(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
1307 self.may_early_exit = true;
1308
1309 // ------------------------------------
1310 // Load the base address into TEMP_A
1311 // and physical memory pointer into TEMP_B
1312 // ------------------------------------
1313 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1314
1315 dynasm! {
1316 self;
1317 .arch x64;
1318
1319 // ------------------------------------
1320 // Add the immediate to the base address
1321 //
1322 // TEMP_A = rs1 + imm = addr
1323 // ------------------------------------
1324 add Rq(TEMP_A), imm as i32;
1325
1326 // ------------------------------------
1327 // Store the intra-word offset.
1328 // ------------------------------------
1329 mov rax, Rq(TEMP_A);
1330 and rax, 7;
1331
1332 // ------------------------------------
1333 // Align to the start of the word.
1334 //
1335 // Scale to account for the entry size.
1336 // ------------------------------------
1337 and Rq(TEMP_A), -8;
1338 shl Rq(TEMP_A), 1;
1339
1340 // ------------------------------------
1341 // 3. Add the risc32 byte offset to the physical memory pointer
1342 //
1343 // TEMP_A = addr + physical_memory_pointer
1344 // ------------------------------------
1345 add Rq(TEMP_A), Rq(MEMORY_PTR);
1346
1347 // ------------------------------------
1348 // 4. Load the word from physical memory into TEMP_A (sign-extended to 64-bit)
1349 // ------------------------------------
1350 movsxd Rq(TEMP_A), DWORD [Rq(TEMP_A) + 8 + rax]
1351 }
1352
1353 // ------------------------------------
1354 // 5. Store the result in the destination register.
1355 // ------------------------------------
1356 self.emit_risc_register_store(TEMP_A, None, rd);
1357 }
1358
1359 fn lwu(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
1360 self.may_early_exit = true;
1361
1362 // ------------------------------------
1363 // Load the base address into TEMP_A
1364 // and physical memory pointer into TEMP_B
1365 // ------------------------------------
1366 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1367
1368 dynasm! {
1369 self;
1370 .arch x64;
1371
1372 // ------------------------------------
1373 // Add the immediate to the base address
1374 //
1375 // TEMP_A = rs1 + imm = addr
1376 // ------------------------------------
1377 add Rq(TEMP_A), imm as i32;
1378
1379 // ------------------------------------
1380 // Store the intra-word offset.
1381 // ------------------------------------
1382 mov rax, Rq(TEMP_A);
1383 and rax, 7;
1384
1385 // ------------------------------------
1386 // Align to the start of the word.
1387 //
1388 // Scale to account for the entry size.
1389 // ------------------------------------
1390 and Rq(TEMP_A), -8;
1391 shl Rq(TEMP_A), 1;
1392
1393 // ------------------------------------
1394 // 3. Add the risc32 byte offset to the physical memory pointer
1395 //
1396 // TEMP_A = addr + physical_memory_pointer
1397 // ------------------------------------
1398 add Rq(TEMP_A), Rq(MEMORY_PTR);
1399
1400 // ------------------------------------
1401 // 4. Load the word from physical memory into TEMP_B (zero-extended to 64-bit)
1402 // ------------------------------------
1403 mov Rd(TEMP_A), DWORD [Rq(TEMP_A) + 8 + rax]
1404 }
1405
1406 // ------------------------------------
1407 // 5. Store the result in the destination register.
1408 // ------------------------------------
1409 self.emit_risc_register_store(TEMP_A, None, rd);
1410 }
1411
1412 fn ld(&mut self, rd: RiscRegister, rs1: RiscRegister, imm: u64) {
1413 self.may_early_exit = true;
1414
1415 // ------------------------------------
1416 // 1. Load the base address into TEMP_A
1417 // and physical memory pointer into TEMP_B
1418 // ------------------------------------
1419 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1420
1421 dynasm! {
1422 self;
1423 .arch x64;
1424
1425 // ------------------------------------
1426 // Add the immediate to the base address
1427 //
1428 // TEMP_A = rs1 + imm = addr
1429 // ------------------------------------
1430 add Rq(TEMP_A), imm as i32;
1431
1432 // ------------------------------------
1433 // Scale to account for the entry size.
1434 //
1435 // Assume the addr is properly aligned.
1436 // ------------------------------------
1437 shl Rq(TEMP_A), 1;
1438
1439 // ------------------------------------
1440 // Add the risc byte offset to the physical memory pointer
1441 //
1442 // TEMP_A = addr + physical_memory_pointer
1443 // ------------------------------------
1444 add Rq(TEMP_A), Rq(MEMORY_PTR);
1445
1446 // ------------------------------------
1447 // Load the word from physical memory into TEMP_A
1448 // ------------------------------------
1449 mov Rq(TEMP_A), QWORD [Rq(TEMP_A) + 8]
1450 }
1451
1452 // ------------------------------------
1453 // Store the result in the destination register.
1454 // ------------------------------------
1455 self.emit_risc_register_store(TEMP_A, None, rd);
1456 }
1457
1458 fn sb(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
1459 self.may_early_exit = true;
1460
1461 // ------------------------------------
1462 // Load the base address into TEMP_A
1463 // and physical memory pointer into TEMP_B
1464 // ------------------------------------
1465 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1466
1467 dynasm! {
1468 self;
1469 .arch x64;
1470
1471 // ------------------------------------
1472 // Add the immediate to the base address
1473 // ------------------------------------
1474 add Rq(TEMP_A), imm as i32;
1475
1476 // ------------------------------------
1477 // Store the intra-word offset.
1478 // ------------------------------------
1479 mov rax, Rq(TEMP_A);
1480 and rax, 7;
1481
1482 // ------------------------------------
1483 // Align to the start of the word.
1484 //
1485 // Scale to account for the entry size.
1486 // ------------------------------------
1487 and Rq(TEMP_A), -8;
1488 shl Rq(TEMP_A), 1;
1489
1490 // ------------------------------------
1491 // Add the risc32 byte offset to the physical memory pointer
1492 // ------------------------------------
1493 add Rq(TEMP_A), Rq(MEMORY_PTR)
1494 }
1495
1496 // ------------------------------------
1497 // Load the word from the RISC register into TEMP_B
1498 // ------------------------------------
1499 self.emit_risc_operand_load(rs2.into(), TEMP_B);
1500
1501 // ------------------------------------
1502 // Store the word into physical memory
1503 // ------------------------------------
1504 dynasm! {
1505 self;
1506 .arch x64;
1507
1508 mov BYTE [Rq(TEMP_A) + 8 + rax], Rb(TEMP_B)
1509 }
1510 }
1511
1512 fn sh(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
1513 self.may_early_exit = true;
1514
1515 // ------------------------------------
1516 // Load the base address into TEMP_A
1517 // and physical memory pointer into TEMP_B
1518 // ------------------------------------
1519 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1520
1521 dynasm! {
1522 self;
1523 .arch x64;
1524
1525 // ------------------------------------
1526 // Add the immediate to the base address
1527 // ------------------------------------
1528 add Rq(TEMP_A), imm as i32;
1529
1530 // ------------------------------------
1531 // Store the intra-word offset.
1532 // ------------------------------------
1533 mov rax, Rq(TEMP_A);
1534 and rax, 7;
1535
1536 // ------------------------------------
1537 // Align to the start of the word.
1538 // Scale to account for the entry size.
1539 // ------------------------------------
1540 and Rq(TEMP_A), -8;
1541 shl Rq(TEMP_A), 1;
1542
1543 // ------------------------------------
1544 // Add the risc32 byte offset to the physical memory pointer
1545 // ------------------------------------
1546 add Rq(TEMP_A), Rq(MEMORY_PTR)
1547 }
1548
1549 // ------------------------------------
1550 // Load the word from the RISC register into TEMP_B
1551 // ------------------------------------
1552 self.emit_risc_operand_load(rs2.into(), TEMP_B);
1553
1554 // ------------------------------------
1555 // Store the word into physical memory
1556 // ------------------------------------
1557 dynasm! {
1558 self;
1559 .arch x64;
1560
1561 mov WORD [Rq(TEMP_A) + 8 + rax], Rw(TEMP_B)
1562 }
1563 }
1564
1565 fn sw(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
1566 self.may_early_exit = true;
1567
1568 // ------------------------------------
1569 // Load the base address into TEMP_A
1570 // and physical memory pointer into TEMP_B
1571 // ------------------------------------
1572 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1573
1574 dynasm! {
1575 self;
1576 .arch x64;
1577
1578 // ------------------------------------
1579 // Add the immediate to the base address
1580 // ------------------------------------
1581 add Rq(TEMP_A), imm as i32;
1582
1583 // ------------------------------------
1584 // Store the intra-word offset.
1585 // ------------------------------------
1586 mov rax, Rq(TEMP_A);
1587 and rax, 7;
1588
1589 // ------------------------------------
1590 // Align to the start of the word.
1591 // Scale to account for the entry size.
1592 // ------------------------------------
1593 and Rq(TEMP_A), -8;
1594 shl Rq(TEMP_A), 1;
1595
1596 // ------------------------------------
1597 // Add the risc32 byte offset to the physical memory pointer
1598 // ------------------------------------
1599 add Rq(TEMP_A), Rq(MEMORY_PTR)
1600 }
1601
1602 // ------------------------------------
1603 // Load the word from the RISC register into TEMP_B
1604 // ------------------------------------
1605 self.emit_risc_operand_load(rs2.into(), TEMP_B);
1606
1607 // ------------------------------------
1608 // Store the word into physical memory
1609 // ------------------------------------
1610 dynasm! {
1611 self;
1612 .arch x64;
1613
1614 mov DWORD [Rq(TEMP_A) + 8 + rax], Rd(TEMP_B)
1615 }
1616 }
1617
1618 fn sd(&mut self, rs1: RiscRegister, rs2: RiscRegister, imm: u64) {
1619 self.may_early_exit = true;
1620
1621 // ------------------------------------
1622 // Load the base address into TEMP_A
1623 // and physical memory pointer into TEMP_B
1624 // ------------------------------------
1625 self.emit_risc_operand_load(rs1.into(), TEMP_A);
1626
1627 dynasm! {
1628 self;
1629 .arch x64;
1630
1631 // ------------------------------------
1632 // Add the immediate to the base address
1633 // ------------------------------------
1634 add Rq(TEMP_A), imm as i32;
1635
1636 // ------------------------------------
1637 // Scale to account for the entry size.
1638 //
1639 // Assume the addr is properly aligned.
1640 // ------------------------------------
1641 shl Rq(TEMP_A), 1;
1642
1643 // ------------------------------------
1644 // 3. Add the risc32 byte offset to the physical memory pointer
1645 // ------------------------------------
1646 add Rq(TEMP_A), Rq(MEMORY_PTR)
1647 }
1648
1649 // ------------------------------------
1650 // Load the word from the RISC register into TEMP_B
1651 // ------------------------------------
1652 self.emit_risc_operand_load(rs2.into(), TEMP_B);
1653
1654 // ------------------------------------
1655 // Store the word into physical memory
1656 // ------------------------------------
1657 dynasm! {
1658 self;
1659 .arch x64;
1660
1661 mov QWORD [Rq(TEMP_A) + 8], Rq(TEMP_B)
1662 }
1663 }
1664}
1665
1666impl SystemInstructions for TranspilerBackend {
1667 fn ecall(&mut self) {
1668 // Mark that a control flow instruction has been inserted.
1669 self.control_flow_instruction_inserted = true;
1670 self.may_early_exit = true;
1671
1672 // Load the JitContext pointer into the argument register.
1673 dynasm! {
1674 self;
1675 .arch x64;
1676 mov rdi, Rq(CONTEXT)
1677 };
1678
1679 // `sp1_ecall_handler` bumps PC for syscalls. So we just need
1680 // to set current PC.
1681 self.update_pc(TEMP_A, self.pc_current);
1682
1683 self.call_extern_fn_raw(self.ecall_handler as _);
1684
1685 // The ecall returns a u64 in RAX.
1686 self.emit_risc_register_store(Rq::RAX as u8, None, RiscRegister::X5);
1687
1688 // Add the base amount of cycles for the instruction.
1689 self.bump_clk();
1690
1691 self.end_branch(None);
1692 }
1693
1694 fn unimp(&mut self) {
1695 extern "C" fn unimp(ctx: *mut JitContext) {
1696 let ctx = unsafe { &mut *ctx };
1697 eprintln!("Unimplemented instruction at pc: {}", ctx.pc);
1698 // Trap via SIGILL so the parent maps this to a typed `Unimplemented`
1699 // cause instead of a generic abort.
1700 unsafe { libc::raise(libc::SIGILL) };
1701 }
1702
1703 self.update_pc(TEMP_A, self.pc_current);
1704 self.call_extern_fn(unimp);
1705 }
1706}