use super::super::instructions::{Operand, PtxInstruction, PtxOp};
use super::super::registers::VirtualReg;
use super::super::types::{PtxStateSpace, PtxType};
use super::core::KernelBuilderCore;
pub trait PtxMemory: KernelBuilderCore {
fn ld_global_f32(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::F32);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Ld, PtxType::F32)
.space(PtxStateSpace::Global)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
fn st_global_f32(&mut self, addr: VirtualReg, val: VirtualReg) {
self.instructions_mut().push(
PtxInstruction::new(PtxOp::St, PtxType::F32)
.space(PtxStateSpace::Global)
.src(Operand::Reg(addr))
.src(Operand::Reg(val)),
);
}
fn ld_global_u32(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::U32);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Ld, PtxType::U32)
.space(PtxStateSpace::Global)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
fn st_global_u32(&mut self, addr: VirtualReg, val: VirtualReg) {
self.instructions_mut().push(
PtxInstruction::new(PtxOp::St, PtxType::U32)
.space(PtxStateSpace::Global)
.src(Operand::Reg(addr))
.src(Operand::Reg(val)),
);
}
fn ld_global_u8(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::U32);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Ld, PtxType::U8)
.space(PtxStateSpace::Global)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
fn ld_global_u16(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::U32);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Ld, PtxType::U16)
.space(PtxStateSpace::Global)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
fn ld_shared_f32(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::F32);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Ld, PtxType::F32)
.space(PtxStateSpace::Shared)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
fn st_shared_f32(&mut self, addr: VirtualReg, val: VirtualReg) {
self.instructions_mut().push(
PtxInstruction::new(PtxOp::St, PtxType::F32)
.space(PtxStateSpace::Shared)
.src(Operand::Reg(addr))
.src(Operand::Reg(val)),
);
}
fn ld_shared_u32(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::U32);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Ld, PtxType::U32)
.space(PtxStateSpace::Shared)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
fn st_shared_u32(&mut self, addr: VirtualReg, val: VirtualReg) {
self.instructions_mut().push(
PtxInstruction::new(PtxOp::St, PtxType::U32)
.space(PtxStateSpace::Shared)
.src(Operand::Reg(addr))
.src(Operand::Reg(val)),
);
}
fn ld_shared_u32_volatile(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::U32);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::LdVolatile, PtxType::U32)
.space(PtxStateSpace::Shared)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
fn prefetch_global_l2(&mut self, addr: VirtualReg) {
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Prefetch, PtxType::U8)
.space(PtxStateSpace::Global)
.src(Operand::Reg(addr)),
);
}
fn st_shared_f16(&mut self, addr: VirtualReg, val: VirtualReg) {
self.instructions_mut().push(
PtxInstruction::new(PtxOp::St, PtxType::B16)
.space(PtxStateSpace::Shared)
.src(Operand::Reg(addr))
.src(Operand::Reg(val)),
);
}
fn ld_shared_f16(&mut self, addr: VirtualReg) -> VirtualReg {
let dst = self.registers_mut().allocate_virtual(PtxType::F16);
self.instructions_mut().push(
PtxInstruction::new(PtxOp::Ld, PtxType::B16)
.space(PtxStateSpace::Shared)
.dst(Operand::Reg(dst))
.src(Operand::Reg(addr)),
);
dst
}
}
impl<T: KernelBuilderCore> PtxMemory for T {}
#[cfg(test)]
mod tests {
use super::*;
use crate::ptx::registers::RegisterAllocator;
struct MockBuilder {
registers: RegisterAllocator,
instructions: Vec<PtxInstruction>,
labels: Vec<String>,
}
impl MockBuilder {
fn new() -> Self {
Self {
registers: RegisterAllocator::new(),
instructions: Vec::new(),
labels: Vec::new(),
}
}
}
impl KernelBuilderCore for MockBuilder {
fn registers_mut(&mut self) -> &mut RegisterAllocator {
&mut self.registers
}
fn instructions_mut(&mut self) -> &mut Vec<PtxInstruction> {
&mut self.instructions
}
fn labels_mut(&mut self) -> &mut Vec<String> {
&mut self.labels
}
}
#[test]
fn test_ld_st_global_f32() {
let mut builder = MockBuilder::new();
let addr = builder.registers.allocate_virtual(PtxType::U64);
let val = builder.ld_global_f32(addr);
builder.st_global_f32(addr, val);
assert_eq!(builder.instructions.len(), 2);
assert_eq!(builder.instructions[0].op, PtxOp::Ld);
assert_eq!(builder.instructions[1].op, PtxOp::St);
}
#[test]
fn test_shared_memory_ops() {
let mut builder = MockBuilder::new();
let addr = builder.registers.allocate_virtual(PtxType::U32);
let val = builder.ld_shared_f32(addr);
builder.st_shared_f32(addr, val);
assert_eq!(builder.instructions.len(), 2);
assert_eq!(builder.instructions[0].state_space, Some(PtxStateSpace::Shared));
assert_eq!(builder.instructions[1].state_space, Some(PtxStateSpace::Shared));
}
#[test]
fn test_global_u32_ops() {
let mut builder = MockBuilder::new();
let addr = builder.registers.allocate_virtual(PtxType::U64);
let val = builder.ld_global_u32(addr);
builder.st_global_u32(addr, val);
assert_eq!(builder.instructions.len(), 2);
assert_eq!(builder.instructions[0].ty, PtxType::U32);
assert_eq!(builder.instructions[1].ty, PtxType::U32);
}
#[test]
fn test_global_u8_load() {
let mut builder = MockBuilder::new();
let addr = builder.registers.allocate_virtual(PtxType::U64);
let val = builder.ld_global_u8(addr);
assert_eq!(builder.instructions.len(), 1);
assert_eq!(builder.instructions[0].op, PtxOp::Ld);
assert_eq!(builder.instructions[0].ty, PtxType::U8);
assert_eq!(builder.instructions[0].state_space, Some(PtxStateSpace::Global));
assert_eq!(val.ty(), PtxType::U32); }
#[test]
fn test_global_u16_load() {
let mut builder = MockBuilder::new();
let addr = builder.registers.allocate_virtual(PtxType::U64);
let val = builder.ld_global_u16(addr);
assert_eq!(builder.instructions.len(), 1);
assert_eq!(builder.instructions[0].op, PtxOp::Ld);
assert_eq!(builder.instructions[0].ty, PtxType::U16);
assert_eq!(builder.instructions[0].state_space, Some(PtxStateSpace::Global));
assert_eq!(val.ty(), PtxType::U32); }
#[test]
fn test_shared_u32_ops() {
let mut builder = MockBuilder::new();
let addr = builder.registers.allocate_virtual(PtxType::U32);
let val = builder.ld_shared_u32(addr);
builder.st_shared_u32(addr, val);
assert_eq!(builder.instructions.len(), 2);
assert_eq!(builder.instructions[0].op, PtxOp::Ld);
assert_eq!(builder.instructions[0].ty, PtxType::U32);
assert_eq!(builder.instructions[0].state_space, Some(PtxStateSpace::Shared));
assert_eq!(builder.instructions[1].op, PtxOp::St);
assert_eq!(builder.instructions[1].state_space, Some(PtxStateSpace::Shared));
}
}