1pub(crate) mod generated_code;
5use crate::{ir::AtomicRmwOp, ir::types};
6use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass};
7
8use super::external::{CraneliftRegisters, PairedGpr, PairedXmm, isle_assembler_methods};
10use super::{MergeableLoadSize, is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
11use crate::ir::condcodes::{FloatCC, IntCC};
12use crate::ir::immediates::*;
13use crate::ir::types::*;
14use crate::ir::{
15 BlockCall, Inst, InstructionData, LibCall, MemFlags, Opcode, TrapCode, Value, ValueList,
16};
17use crate::isa::x64::X64Backend;
18use crate::isa::x64::inst::{ReturnCallInfo, args::*, regs};
19use crate::isa::x64::lower::{InsnInput, emit_vm_call};
20use crate::machinst::isle::*;
21use crate::machinst::{
22 ArgPair, CallArgList, CallInfo, CallRetList, InstOutput, MachInst, VCodeConstant,
23 VCodeConstantData,
24};
25use alloc::boxed::Box;
26use alloc::vec::Vec;
27use cranelift_assembler_x64 as asm;
28use regalloc2::PReg;
29
30type BoxCallInfo = Box<CallInfo<ExternalName>>;
35type BoxCallIndInfo = Box<CallInfo<RegMem>>;
36type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
37type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
38type VecArgPair = Vec<ArgPair>;
39type BoxSyntheticAmode = Box<SyntheticAmode>;
40
41type AssemblerInst = asm::Inst<CraneliftRegisters>;
44
45pub struct SinkableLoad {
46 inst: Inst,
47 addr_input: InsnInput,
48 offset: i32,
49}
50
51pub(crate) fn lower(
53 lower_ctx: &mut Lower<MInst>,
54 backend: &X64Backend,
55 inst: Inst,
56) -> Option<InstOutput> {
57 let mut isle_ctx = IsleContext { lower_ctx, backend };
60 generated_code::constructor_lower(&mut isle_ctx, inst)
61}
62
63pub(crate) fn lower_branch(
64 lower_ctx: &mut Lower<MInst>,
65 backend: &X64Backend,
66 branch: Inst,
67 targets: &[MachLabel],
68) -> Option<()> {
69 let mut isle_ctx = IsleContext { lower_ctx, backend };
72 generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets)
73}
74
75impl Context for IsleContext<'_, '_, MInst, X64Backend> {
76 isle_lower_prelude_methods!();
77 isle_assembler_methods!();
78
79 fn gen_call_info(
80 &mut self,
81 sig: Sig,
82 dest: ExternalName,
83 uses: CallArgList,
84 defs: CallRetList,
85 try_call_info: Option<TryCallInfo>,
86 patchable: bool,
87 ) -> BoxCallInfo {
88 let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
89 let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
90 self.lower_ctx
91 .abi_mut()
92 .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
93
94 Box::new(
95 self.lower_ctx
96 .gen_call_info(sig, dest, uses, defs, try_call_info, patchable),
97 )
98 }
99
100 fn gen_call_ind_info(
101 &mut self,
102 sig: Sig,
103 dest: &RegMem,
104 uses: CallArgList,
105 defs: CallRetList,
106 try_call_info: Option<TryCallInfo>,
107 ) -> BoxCallIndInfo {
108 let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
109 let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
110 self.lower_ctx
111 .abi_mut()
112 .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
113
114 Box::new(
115 self.lower_ctx
116 .gen_call_info(sig, dest.clone(), uses, defs, try_call_info, false),
117 )
118 }
119
120 fn gen_return_call_info(
121 &mut self,
122 sig: Sig,
123 dest: ExternalName,
124 uses: CallArgList,
125 ) -> BoxReturnCallInfo {
126 let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
127 self.lower_ctx
128 .abi_mut()
129 .accumulate_tail_args_size(new_stack_arg_size);
130
131 Box::new(ReturnCallInfo {
132 dest,
133 uses,
134 tmp: self.lower_ctx.temp_writable_gpr(),
135 new_stack_arg_size,
136 })
137 }
138
139 fn gen_return_call_ind_info(
140 &mut self,
141 sig: Sig,
142 dest: Reg,
143 uses: CallArgList,
144 ) -> BoxReturnCallIndInfo {
145 let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
146 self.lower_ctx
147 .abi_mut()
148 .accumulate_tail_args_size(new_stack_arg_size);
149
150 Box::new(ReturnCallInfo {
151 dest,
152 uses,
153 tmp: self.lower_ctx.temp_writable_gpr(),
154 new_stack_arg_size,
155 })
156 }
157
158 #[inline]
159 fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
160 if ty.bits() == 64 {
161 OperandSize::Size64
162 } else {
163 OperandSize::Size32
164 }
165 }
166
167 #[inline]
168 fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
169 OperandSize::from_ty(ty)
170 }
171
172 fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
173 if let Some(imm) = self.i64_from_iconst(val) {
174 if let Ok(imm) = i32::try_from(imm) {
175 return RegMemImm::Imm {
176 simm32: imm.cast_unsigned(),
177 };
178 }
179 }
180
181 self.put_in_reg_mem(val).into()
182 }
183
184 fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
185 if let Some(imm) = self.i64_from_iconst(val) {
186 if let Ok(imm) = i32::try_from(imm) {
187 return XmmMemImm::unwrap_new(RegMemImm::Imm {
188 simm32: imm.cast_unsigned(),
189 });
190 }
191 }
192
193 let res = match self.put_in_xmm_mem(val).to_reg_mem() {
194 RegMem::Reg { reg } => RegMemImm::Reg { reg },
195 RegMem::Mem { addr } => RegMemImm::Mem { addr },
196 };
197
198 XmmMemImm::unwrap_new(res)
199 }
200
201 fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
202 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
203
204 if let Some(c) = inputs.constant {
205 let vcode_constant = self.emit_u128_le_const(c as u128);
211 return XmmMem::unwrap_new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)));
212 }
213
214 XmmMem::unwrap_new(self.put_in_reg_mem(val))
215 }
216
217 fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
218 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
219
220 if let Some(c) = inputs.constant {
221 let vcode_constant = self.emit_u64_le_const(c);
225 return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
226 }
227
228 if let Some(load) = self.sinkable_load(val) {
229 return RegMem::Mem {
230 addr: self.sink_load(&load),
231 };
232 }
233
234 RegMem::reg(self.put_in_reg(val))
235 }
236
237 #[inline]
238 fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
239 imm.encode()
240 }
241
242 #[inline]
243 fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
244 imm.encode()
245 }
246
247 #[inline]
248 fn has_avx(&mut self) -> bool {
249 self.backend.x64_flags.has_avx()
250 }
251
252 #[inline]
253 fn use_avx2(&mut self) -> bool {
254 self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_avx2()
255 }
256
257 #[inline]
258 fn has_avx512vl(&mut self) -> bool {
259 self.backend.x64_flags.has_avx512vl()
260 }
261
262 #[inline]
263 fn has_avx512dq(&mut self) -> bool {
264 self.backend.x64_flags.has_avx512dq()
265 }
266
267 #[inline]
268 fn has_avx512f(&mut self) -> bool {
269 self.backend.x64_flags.has_avx512f()
270 }
271
272 #[inline]
273 fn has_avx512bitalg(&mut self) -> bool {
274 self.backend.x64_flags.has_avx512bitalg()
275 }
276
277 #[inline]
278 fn has_avx512vbmi(&mut self) -> bool {
279 self.backend.x64_flags.has_avx512vbmi()
280 }
281
282 #[inline]
283 fn has_lzcnt(&mut self) -> bool {
284 self.backend.x64_flags.has_lzcnt()
285 }
286
287 #[inline]
288 fn has_bmi1(&mut self) -> bool {
289 self.backend.x64_flags.has_bmi1()
290 }
291
292 #[inline]
293 fn has_bmi2(&mut self) -> bool {
294 self.backend.x64_flags.has_bmi2()
295 }
296
297 #[inline]
298 fn use_popcnt(&mut self) -> bool {
299 self.backend.x64_flags.has_popcnt() && self.backend.x64_flags.has_sse42()
300 }
301
302 #[inline]
303 fn use_fma(&mut self) -> bool {
304 self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_fma()
305 }
306
307 #[inline]
308 fn has_sse3(&mut self) -> bool {
309 self.backend.x64_flags.has_sse3()
310 }
311
312 #[inline]
313 fn has_ssse3(&mut self) -> bool {
314 self.backend.x64_flags.has_ssse3()
315 }
316
317 #[inline]
318 fn has_sse41(&mut self) -> bool {
319 self.backend.x64_flags.has_sse41()
320 }
321
322 #[inline]
323 fn use_sse42(&mut self) -> bool {
324 self.backend.x64_flags.has_sse41() && self.backend.x64_flags.has_sse42()
325 }
326
327 #[inline]
328 fn has_cmpxchg16b(&mut self) -> bool {
329 self.backend.x64_flags.has_cmpxchg16b()
330 }
331
332 #[inline]
333 fn shift_mask(&mut self, ty: Type) -> u8 {
334 debug_assert!(ty.lane_bits().is_power_of_two());
335
336 (ty.lane_bits() - 1) as u8
337 }
338
339 fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
340 (val.bits() as u8) & self.shift_mask(ty)
341 }
342
343 #[inline]
344 fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
345 let imm = self.i64_from_iconst(val)?;
346 Some(GprMemImm::unwrap_new(RegMemImm::Imm {
347 simm32: i32::try_from(imm).ok()?.cast_unsigned(),
348 }))
349 }
350
351 fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
352 if let Some(inst) = self.is_sinkable_inst(val) {
353 if let Some((addr_input, offset)) =
354 is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
355 {
356 return Some(SinkableLoad {
357 inst,
358 addr_input,
359 offset,
360 });
361 }
362 }
363 None
364 }
365
366 fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
367 if let Some(inst) = self.is_sinkable_inst(val) {
368 if let Some((addr_input, offset)) =
369 is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
370 {
371 return Some(SinkableLoad {
372 inst,
373 addr_input,
374 offset,
375 });
376 }
377 }
378 None
379 }
380
381 fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
382 self.lower_ctx.sink_inst(load.inst);
383 let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
384 SyntheticAmode::Real(addr)
385 }
386
387 #[inline]
388 fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
389 ExtMode::new(from_bits, to_bits).unwrap()
390 }
391
392 fn emit(&mut self, inst: &MInst) -> Unit {
393 self.lower_ctx.emit(inst.clone());
394 }
395
396 #[inline]
397 fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
398 0b00_00_00_00 | lane << 4
401 }
402
403 #[inline]
404 fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
405 RegMem::mem(addr.clone())
406 }
407
408 #[inline]
409 fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
410 amode.clone().into()
411 }
412
413 #[inline]
414 fn synthetic_amode_slot(&mut self, offset: i32) -> SyntheticAmode {
415 SyntheticAmode::SlotOffset { simm32: offset }
416 }
417
418 #[inline]
419 fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
420 SyntheticAmode::ConstantOffset(c)
421 }
422
423 #[inline]
424 fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
425 r.to_writable_reg()
426 }
427
428 #[inline]
429 fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
430 r.to_writable_reg()
431 }
432
433 fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
434 debug_assert!(amt < 8);
437 let mask_offset = amt as usize * 16;
438 let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
439 &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
440 ));
441 SyntheticAmode::ConstantOffset(mask_constant)
442 }
443
444 fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
445 let mask_table = self
446 .lower_ctx
447 .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
448 SyntheticAmode::ConstantOffset(mask_table)
449 }
450
451 fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
452 debug_assert!(amt < 8);
455 let mask_offset = amt as usize * 16;
456 let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
457 &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
458 ));
459 SyntheticAmode::ConstantOffset(mask_constant)
460 }
461
462 fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
463 let mask_table = self
464 .lower_ctx
465 .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
466 SyntheticAmode::ConstantOffset(mask_table)
467 }
468
469 #[inline]
470 fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
471 Writable::from_reg(Xmm::unwrap_new(r.to_reg()))
472 }
473
474 #[inline]
475 fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
476 r.to_reg()
477 }
478
479 #[inline]
480 fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
481 r.to_reg()
482 }
483
484 #[inline]
485 fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
486 r.into()
487 }
488
489 #[inline]
490 fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
491 r.into()
492 }
493
494 #[inline]
495 fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
496 r.into()
497 }
498
499 #[inline]
500 fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
501 XmmMemImm::unwrap_new(r.clone().to_reg_mem().into())
502 }
503
504 #[inline]
505 fn temp_writable_gpr(&mut self) -> WritableGpr {
506 self.lower_ctx.temp_writable_gpr()
507 }
508
509 #[inline]
510 fn temp_writable_xmm(&mut self) -> WritableXmm {
511 self.lower_ctx.temp_writable_xmm()
512 }
513
514 #[inline]
515 fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
516 RegMemImm::Reg { reg }
517 }
518
519 #[inline]
520 fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
521 XmmMem::unwrap_new(rm.clone())
522 }
523
524 #[inline]
525 fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
526 GprMemImm::unwrap_new(rmi.clone())
527 }
528
529 #[inline]
530 fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
531 XmmMemImm::unwrap_new(rmi.clone())
532 }
533
534 #[inline]
535 fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
536 r.into()
537 }
538
539 #[inline]
540 fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
541 xm.clone().into()
542 }
543
544 #[inline]
545 fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
546 gm.clone().into()
547 }
548
549 #[inline]
550 fn xmm_new(&mut self, r: Reg) -> Xmm {
551 Xmm::unwrap_new(r)
552 }
553
554 #[inline]
555 fn gpr_new(&mut self, r: Reg) -> Gpr {
556 Gpr::unwrap_new(r)
557 }
558
559 #[inline]
560 fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
561 GprMem::unwrap_new(rm.clone())
562 }
563
564 #[inline]
565 fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
566 GprMem::unwrap_new(RegMem::reg(r))
567 }
568
569 #[inline]
570 fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
571 GprMem::from(gpr)
572 }
573
574 #[inline]
575 fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
576 GprMemImm::from(gpr)
577 }
578
579 #[inline]
580 fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
581 if is_int_or_ref_ty(ty) || ty == I128 {
582 Some(RegisterClass::Gpr {
583 single_register: ty != I128,
584 })
585 } else if ty.is_float() || (ty.is_vector() && ty.bits() <= 128) {
586 Some(RegisterClass::Xmm)
587 } else {
588 None
589 }
590 }
591
592 #[inline]
593 fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
594 match ty {
595 types::I8 | types::I16 | types::I32 | types::I64 => Some(()),
596 _ => None,
597 }
598 }
599
600 #[inline]
601 fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
602 CC::from_intcc(*intcc)
603 }
604
605 #[inline]
606 fn cc_invert(&mut self, cc: &CC) -> CC {
607 cc.invert()
608 }
609
610 #[inline]
611 fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
612 match cc {
613 CC::Z => Some(*cc),
614 CC::NZ => Some(*cc),
615 _ => None,
616 }
617 }
618
619 #[inline]
620 fn sum_extend_fits_in_32_bits(
621 &mut self,
622 extend_from_ty: Type,
623 constant_value: Imm64,
624 offset: Offset32,
625 ) -> Option<u32> {
626 let offset: i64 = offset.into();
627 let constant_value: u64 = constant_value.bits() as u64;
628 let shift = 64 - extend_from_ty.bits();
630 let zero_extended_constant_value = (constant_value << shift) >> shift;
631 let sum = offset.wrapping_add(zero_extended_constant_value as i64);
633 if sum == ((sum << 32) >> 32) {
635 Some(sum as u32)
636 } else {
637 None
638 }
639 }
640
641 #[inline]
642 fn amode_offset(&mut self, addr: &SyntheticAmode, offset: i32) -> SyntheticAmode {
643 addr.offset(offset)
644 }
645
646 #[inline]
647 fn zero_offset(&mut self) -> Offset32 {
648 Offset32::new(0)
649 }
650
651 #[inline]
652 fn preg_rbp(&mut self) -> PReg {
653 regs::rbp().to_real_reg().unwrap().into()
654 }
655
656 #[inline]
657 fn preg_rsp(&mut self) -> PReg {
658 regs::rsp().to_real_reg().unwrap().into()
659 }
660
661 #[inline]
662 fn preg_pinned(&mut self) -> PReg {
663 regs::pinned_reg().to_real_reg().unwrap().into()
664 }
665
666 fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
667 let outputs = emit_vm_call(
668 self.lower_ctx,
669 &self.backend.flags,
670 &self.backend.triple,
671 *libcall,
672 &[ValueRegs::one(a)],
673 )
674 .expect("Failed to emit LibCall");
675
676 debug_assert_eq!(outputs.len(), 1);
677
678 outputs[0].only_reg().unwrap()
679 }
680
681 fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
682 let outputs = emit_vm_call(
683 self.lower_ctx,
684 &self.backend.flags,
685 &self.backend.triple,
686 *libcall,
687 &[ValueRegs::one(a), ValueRegs::one(b)],
688 )
689 .expect("Failed to emit LibCall");
690
691 debug_assert_eq!(outputs.len(), 1);
692
693 outputs[0].only_reg().unwrap()
694 }
695
696 fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
697 let outputs = emit_vm_call(
698 self.lower_ctx,
699 &self.backend.flags,
700 &self.backend.triple,
701 *libcall,
702 &[ValueRegs::one(a), ValueRegs::one(b), ValueRegs::one(c)],
703 )
704 .expect("Failed to emit LibCall");
705
706 debug_assert_eq!(outputs.len(), 1);
707
708 outputs[0].only_reg().unwrap()
709 }
710
711 #[inline]
712 fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
713 let const_data = self.lower_ctx.get_constant_data(constant);
714 if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
715 return Some(());
716 }
717 None
718 }
719
720 #[inline]
721 fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
722 let mask = mask
723 .iter()
724 .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
725 .map(|b| if b > 15 { 0b10000000 } else { b })
726 .collect();
727 self.lower_ctx
728 .use_constant(VCodeConstantData::Generated(mask))
729 }
730
731 #[inline]
732 fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
733 let mask = mask
734 .iter()
735 .map(|&b| if b > 15 { 0b10000000 } else { b })
736 .collect();
737 self.lower_ctx
738 .use_constant(VCodeConstantData::Generated(mask))
739 }
740
741 #[inline]
742 fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
743 let mask = mask
744 .iter()
745 .map(|&b| b.wrapping_sub(16))
746 .map(|b| if b > 15 { 0b10000000 } else { b })
747 .collect();
748 self.lower_ctx
749 .use_constant(VCodeConstantData::Generated(mask))
750 }
751
752 #[inline]
753 fn perm_from_mask_with_zeros(
754 &mut self,
755 mask: &VecMask,
756 ) -> Option<(VCodeConstant, VCodeConstant)> {
757 if !mask.iter().any(|&b| b > 31) {
758 return None;
759 }
760
761 let zeros = mask
762 .iter()
763 .map(|&b| if b > 31 { 0x00 } else { 0xff })
764 .collect();
765
766 Some((
767 self.perm_from_mask(mask),
768 self.lower_ctx
769 .use_constant(VCodeConstantData::Generated(zeros)),
770 ))
771 }
772
773 #[inline]
774 fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
775 let mask = mask.iter().cloned().collect();
776 self.lower_ctx
777 .use_constant(VCodeConstantData::Generated(mask))
778 }
779
780 fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
781 match XmmMemAligned::new(arg.clone().into()) {
782 Some(aligned) => aligned,
783 None => match arg.clone().into() {
784 RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
785 _ => unreachable!(),
786 },
787 }
788 }
789
790 fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
791 match XmmMemAlignedImm::new(arg.clone().into()) {
792 Some(aligned) => aligned,
793 None => match arg.clone().into() {
794 RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
795 _ => unreachable!(),
796 },
797 }
798 }
799
800 fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
801 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
802 if a < 4 && b < 4 && c < 4 && d < 4 {
803 Some(a | (b << 2) | (c << 4) | (d << 6))
804 } else {
805 None
806 }
807 }
808
809 fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
810 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
811 let a = a.checked_sub(4)?;
815 let b = b.checked_sub(4)?;
816 let c = c.checked_sub(4)?;
817 let d = d.checked_sub(4)?;
818 if a < 4 && b < 4 && c < 4 && d < 4 {
819 Some(a | (b << 2) | (c << 4) | (d << 6))
820 } else {
821 None
822 }
823 }
824
825 fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
826 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
831 let c = c.checked_sub(4)?;
832 let d = d.checked_sub(4)?;
833 if a < 4 && b < 4 && c < 4 && d < 4 {
834 Some(a | (b << 2) | (c << 4) | (d << 6))
835 } else {
836 None
837 }
838 }
839
840 fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
841 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
845 let a = a.checked_sub(4)?;
846 let b = b.checked_sub(4)?;
847 if a < 4 && b < 4 && c < 4 && d < 4 {
848 Some(a | (b << 2) | (c << 4) | (d << 6))
849 } else {
850 None
851 }
852 }
853
854 fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
855 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
859 if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
860 Some(a | (b << 2) | (c << 4) | (d << 6))
861 } else {
862 None
863 }
864 }
865
866 fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
867 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
868 let a = a.checked_sub(8)?;
869 let b = b.checked_sub(8)?;
870 let c = c.checked_sub(8)?;
871 let d = d.checked_sub(8)?;
872 let e = e.checked_sub(8)?;
873 let f = f.checked_sub(8)?;
874 let g = g.checked_sub(8)?;
875 let h = h.checked_sub(8)?;
876 if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
877 Some(a | (b << 2) | (c << 4) | (d << 6))
878 } else {
879 None
880 }
881 }
882
883 fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
884 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
888 let e = e.checked_sub(4)?;
889 let f = f.checked_sub(4)?;
890 let g = g.checked_sub(4)?;
891 let h = h.checked_sub(4)?;
892 if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
893 Some(e | (f << 2) | (g << 4) | (h << 6))
894 } else {
895 None
896 }
897 }
898
899 fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
900 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
903 let a = a.checked_sub(8)?;
904 let b = b.checked_sub(8)?;
905 let c = c.checked_sub(8)?;
906 let d = d.checked_sub(8)?;
907 let e = e.checked_sub(12)?;
908 let f = f.checked_sub(12)?;
909 let g = g.checked_sub(12)?;
910 let h = h.checked_sub(12)?;
911 if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
912 Some(e | (f << 2) | (g << 4) | (h << 6))
913 } else {
914 None
915 }
916 }
917
918 fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
919 let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
920
921 if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
922 Some(bytes[0])
923 } else {
924 None
925 }
926 }
927
928 fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
929 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
931
932 let bit = |x: u8, c: u8| {
941 if x % 8 == c {
942 if x < 8 { Some(0) } else { Some(1 << c) }
943 } else {
944 None
945 }
946 };
947 Some(
948 bit(a, 0)?
949 | bit(b, 1)?
950 | bit(c, 2)?
951 | bit(d, 3)?
952 | bit(e, 4)?
953 | bit(f, 5)?
954 | bit(g, 6)?
955 | bit(h, 7)?,
956 )
957 }
958
959 fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
960 XmmMemImm::unwrap_new(RegMemImm::imm(imm))
961 }
962
963 fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
964 let mask = -1i128 as u128;
965 self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
966 }
967
968 fn writable_invalid_gpr(&mut self) -> WritableGpr {
969 let reg = Gpr::new(self.invalid_reg()).unwrap();
970 WritableGpr::from_reg(reg)
971 }
972
973 fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode {
974 Box::new(amode.clone())
975 }
976
977 fn is_imm8(&mut self, src: &GprMemImm) -> Option<u8> {
982 match src.clone().to_reg_mem_imm() {
983 RegMemImm::Imm { simm32 } => {
984 Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
985 }
986 _ => None,
987 }
988 }
989
990 fn is_imm8_xmm(&mut self, src: &XmmMemImm) -> Option<u8> {
991 match src.clone().to_reg_mem_imm() {
992 RegMemImm::Imm { simm32 } => {
993 Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
994 }
995 _ => None,
996 }
997 }
998
999 fn is_simm8(&mut self, src: &GprMemImm) -> Option<i8> {
1000 match src.clone().to_reg_mem_imm() {
1001 RegMemImm::Imm { simm32 } => Some(i8::try_from(simm32.cast_signed()).ok()?),
1002 _ => None,
1003 }
1004 }
1005
1006 fn is_imm16(&mut self, src: &GprMemImm) -> Option<u16> {
1007 match src.clone().to_reg_mem_imm() {
1008 RegMemImm::Imm { simm32 } => {
1009 Some(i16::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
1010 }
1011 _ => None,
1012 }
1013 }
1014
1015 fn is_simm16(&mut self, src: &GprMemImm) -> Option<i16> {
1016 match src.clone().to_reg_mem_imm() {
1017 RegMemImm::Imm { simm32 } => Some(i16::try_from(simm32.cast_signed()).ok()?),
1018 _ => None,
1019 }
1020 }
1021
1022 fn is_imm32(&mut self, src: &GprMemImm) -> Option<u32> {
1023 match src.clone().to_reg_mem_imm() {
1024 RegMemImm::Imm { simm32 } => Some(simm32),
1025 _ => None,
1026 }
1027 }
1028
1029 fn is_simm32(&mut self, src: &GprMemImm) -> Option<i32> {
1030 match src.clone().to_reg_mem_imm() {
1031 RegMemImm::Imm { simm32 } => Some(simm32 as i32),
1032 _ => None,
1033 }
1034 }
1035
1036 fn is_gpr(&mut self, src: &GprMemImm) -> Option<Gpr> {
1037 match src.clone().to_reg_mem_imm() {
1038 RegMemImm::Reg { reg } => Gpr::new(reg),
1039 _ => None,
1040 }
1041 }
1042
1043 fn is_xmm(&mut self, src: &XmmMem) -> Option<Xmm> {
1044 match src.clone().to_reg_mem() {
1045 RegMem::Reg { reg } => Xmm::new(reg),
1046 _ => None,
1047 }
1048 }
1049
1050 fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option<GprMem> {
1051 match src.clone().to_reg_mem_imm() {
1052 RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }),
1053 RegMemImm::Mem { addr } => GprMem::new(RegMem::Mem { addr }),
1054 _ => None,
1055 }
1056 }
1057
1058 fn is_xmm_mem(&mut self, src: &XmmMemImm) -> Option<XmmMem> {
1059 match src.clone().to_reg_mem_imm() {
1060 RegMemImm::Reg { reg } => XmmMem::new(RegMem::Reg { reg }),
1061 RegMemImm::Mem { addr } => XmmMem::new(RegMem::Mem { addr }),
1062 _ => None,
1063 }
1064 }
1065
1066 fn is_mem(&mut self, src: &XmmMem) -> Option<SyntheticAmode> {
1067 match src.clone().to_reg_mem() {
1068 RegMem::Reg { .. } => None,
1069 RegMem::Mem { addr } => Some(addr),
1070 }
1071 }
1072
1073 fn x64_mulxl_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1080 let ret = self.temp_writable_gpr();
1081 let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1082 let inst = asm::inst::mulxl_rvm::new(ret, ret, src1, src2);
1083 self.emit(&MInst::External { inst: inst.into() });
1084 ret.to_reg()
1085 }
1086
1087 fn x64_mulxq_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1088 let ret = self.temp_writable_gpr();
1089 let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1090 let inst = asm::inst::mulxq_rvm::new(ret, ret, src1, src2);
1091 self.emit(&MInst::External { inst: inst.into() });
1092 ret.to_reg()
1093 }
1094
1095 fn bt_imm(&mut self, val: u64) -> Option<u8> {
1096 if val.count_ones() == 1 {
1097 Some(u8::try_from(val.trailing_zeros()).unwrap())
1098 } else {
1099 None
1100 }
1101 }
1102}
1103
1104impl IsleContext<'_, '_, MInst, X64Backend> {
1105 fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1106 let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1107 self.lower_ctx.emit(MInst::External {
1108 inst: asm::inst::movdqu_a::new(
1109 Writable::from_reg(Xmm::unwrap_new(tmp.to_reg())),
1110 asm::XmmMem::Mem(addr.into()),
1111 )
1112 .into(),
1113 });
1114 Xmm::unwrap_new(tmp.to_reg())
1115 }
1116
1117 fn convert_gpr_to_assembler_read_write_gpr(&mut self, read: Gpr) -> asm::Gpr<PairedGpr> {
1119 let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1120 let write = WritableGpr::from_writable_reg(write).unwrap();
1121 asm::Gpr::new(PairedGpr { read, write })
1122 }
1123
1124 fn convert_gpr_to_assembler_fixed_read_write_gpr<const E: u8>(
1126 &mut self,
1127 read: Gpr,
1128 ) -> asm::Fixed<PairedGpr, E> {
1129 let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1130 let write = WritableGpr::from_writable_reg(write).unwrap();
1131 asm::Fixed(PairedGpr { read, write })
1132 }
1133
1134 fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm<PairedXmm> {
1136 let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1137 let write = WritableXmm::from_writable_reg(write).unwrap();
1138 asm::Xmm::new(PairedXmm { read, write })
1139 }
1140
1141 fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem<Gpr, Gpr> {
1143 match read.clone().into() {
1144 RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
1145 RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1146 }
1147 }
1148
1149 fn convert_xmm_mem_to_assembler_read_xmm_mem_aligned(
1151 &self,
1152 read: &XmmMemAligned,
1153 ) -> asm::XmmMem<Xmm, Gpr> {
1154 match read.clone().into() {
1155 RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1156 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1157 }
1158 }
1159
1160 fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem<Xmm, Gpr> {
1162 match read.clone().into() {
1163 RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1164 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1165 }
1166 }
1167
1168 fn convert_xmm_mem_to_assembler_write_xmm_mem(
1170 &self,
1171 write: &XmmMem,
1172 ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1173 match write.clone().into() {
1174 RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1175 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1176 }
1177 }
1178
1179 fn convert_xmm_mem_to_assembler_write_xmm_mem_aligned(
1181 &self,
1182 write: &XmmMemAligned,
1183 ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1184 match write.clone().into() {
1185 RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1186 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1187 }
1188 }
1189
1190 fn convert_gpr_mem_to_assembler_read_write_gpr_mem(
1192 &mut self,
1193 read: &GprMem,
1194 ) -> asm::GprMem<PairedGpr, Gpr> {
1195 match read.clone().into() {
1196 RegMem::Reg { reg } => asm::GprMem::Gpr(
1197 *self
1198 .convert_gpr_to_assembler_read_write_gpr(Gpr::new(reg).unwrap())
1199 .as_ref(),
1200 ),
1201 RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1202 }
1203 }
1204
1205 fn convert_gpr_mem_to_assembler_write_gpr_mem(
1207 &mut self,
1208 read: &GprMem,
1209 ) -> asm::GprMem<WritableGpr, Gpr> {
1210 match read.clone().into() {
1211 RegMem::Reg { reg } => asm::GprMem::Gpr(WritableGpr::from_reg(Gpr::new(reg).unwrap())),
1212 RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1213 }
1214 }
1215
1216 fn convert_amode_to_assembler_amode(&mut self, amode: &SyntheticAmode) -> asm::Amode<Gpr> {
1218 amode.clone().into()
1219 }
1220}
1221
1222#[rustfmt::skip] const I8X16_ISHL_MASKS: [u8; 128] = [
1231 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1232 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1233 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1234 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1235 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1236 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1237 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1238 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1239];
1240
1241#[rustfmt::skip] const I8X16_USHR_MASKS: [u8; 128] = [
1243 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1244 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1245 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1246 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1247 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1248 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1249 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1250 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1251];