use super::super::instructions::{Operand, PtxInstruction, PtxOp, RoundingMode};
use super::super::registers::VirtualReg;
use super::super::types::PtxType;
use super::comparison::PtxComparison;
use super::control::PtxControl;
use super::KernelBuilder;
impl<'a> KernelBuilder<'a> {
pub fn cvt_u64_u32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::U64);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::U64)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_u64_u32_into(&mut self, dst: VirtualReg, val: VirtualReg) {
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::U64)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
}
pub fn cvt_u32_u64(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::U32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::U32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_f32_u32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::F32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::F32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val))
.rounding(RoundingMode::Rn),
);
dst
}
pub fn cvt_s32_s8(&mut self, val: VirtualReg) -> VirtualReg {
let u32_val = self.cvt_u32_u8(val);
let const_128 = self.mov_u32_imm(128);
let is_negative = self.setp_ge_u32(u32_val, const_128);
let const_256 = self.mov_u32_imm(256);
let zero = self.mov_u32_imm(0);
let adjust = self.selp_u32(is_negative, const_256, zero);
self.sub_u32_reg(u32_val, adjust)
}
pub fn cvt_f32_s32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::F32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::F32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val))
.with_src_type(PtxType::S32) .rounding(RoundingMode::Rn),
);
dst
}
pub fn floor_f32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::F32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::F32)
.with_src_type(PtxType::F32)
.rounding(RoundingMode::Rmi) .dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_u32_u8(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::U32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::U32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_u32_u16(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::U32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::U32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_u16_u32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::U16);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::U16)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_rni_s32_f32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::S32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::S32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val))
.rounding(RoundingMode::Rni),
);
dst
}
pub fn mov_s32_imm(&mut self, val: i32) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::S32);
self.instructions.push(
PtxInstruction::new(PtxOp::Mov, PtxType::S32)
.dst(Operand::Reg(dst))
.src(Operand::ImmI64(i64::from(val))),
);
dst
}
pub fn mov_s32_from_u32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::S32);
self.instructions.push(
PtxInstruction::new(PtxOp::Mov, PtxType::S32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_u8_s32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::U8);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::U8)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_s32_u8_sx(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::S32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::S32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn cvt_s32_u32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::S32);
self.instructions.push(
PtxInstruction::new(PtxOp::Cvt, PtxType::S32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn rcp_f32(&mut self, val: VirtualReg) -> VirtualReg {
let dst = self.registers.allocate_virtual(PtxType::F32);
self.instructions.push(
PtxInstruction::new(PtxOp::Rcp, PtxType::F32)
.dst(Operand::Reg(dst))
.src(Operand::Reg(val)),
);
dst
}
pub fn mov_u32_inplace(&mut self, dst: VirtualReg, val: u32) {
self.instructions.push(
PtxInstruction::new(PtxOp::Mov, PtxType::U32)
.dst(Operand::Reg(dst))
.src(Operand::ImmU64(u64::from(val))),
);
}
}