use super::device::GpuDevice;
pub struct GpuBuffer {
pub(crate) buffer: wgpu::Buffer,
pub(crate) len: usize,
}
impl GpuBuffer {
pub fn from_slice(device: &GpuDevice, data: &[f32]) -> Self {
use wgpu::util::DeviceExt;
let buffer = device
.device
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("tang-gpu storage"),
contents: bytemuck::cast_slice(data),
usage: wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_SRC
| wgpu::BufferUsages::COPY_DST,
});
Self {
buffer,
len: data.len(),
}
}
pub fn from_u32_slice(device: &GpuDevice, data: &[u32]) -> Self {
use wgpu::util::DeviceExt;
let buffer = device
.device
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("tang-gpu storage u32"),
contents: bytemuck::cast_slice(data),
usage: wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_SRC
| wgpu::BufferUsages::COPY_DST,
});
Self {
buffer,
len: data.len(),
}
}
pub fn uninit(device: &GpuDevice, len: usize) -> Self {
let buffer = device.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("tang-gpu storage"),
size: (len * std::mem::size_of::<f32>()) as u64,
usage: wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_SRC
| wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
Self { buffer, len }
}
pub async fn to_vec(&self, device: &GpuDevice) -> Vec<f32> {
let size = (self.len * std::mem::size_of::<f32>()) as u64;
let staging = device.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("tang-gpu staging"),
size,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let mut encoder = device
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("tang-gpu download"),
});
encoder.copy_buffer_to_buffer(&self.buffer, 0, &staging, 0, size);
device.queue.submit(std::iter::once(encoder.finish()));
let slice = staging.slice(..);
let (tx, rx) = std::sync::mpsc::channel();
slice.map_async(wgpu::MapMode::Read, move |result| {
tx.send(result).ok();
});
device.device.poll(wgpu::Maintain::Wait);
rx.recv().unwrap().unwrap();
let data = slice.get_mapped_range();
let result: Vec<f32> = bytemuck::cast_slice(&data).to_vec();
drop(data);
staging.unmap();
result
}
pub fn to_vec_sync(&self, device: &GpuDevice) -> Vec<f32> {
pollster::block_on(self.to_vec(device))
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn byte_size(&self) -> u64 {
(self.len * std::mem::size_of::<f32>()) as u64
}
pub fn clone_gpu(&self, device: &GpuDevice) -> GpuBuffer {
let dst = Self::uninit(device, self.len);
let mut encoder = device
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("tang-gpu clone"),
});
encoder.copy_buffer_to_buffer(&self.buffer, 0, &dst.buffer, 0, self.byte_size());
device.queue.submit(std::iter::once(encoder.finish()));
dst
}
pub fn clone_gpu_batched(&self, device: &GpuDevice, cache: &mut super::kernel::KernelCache) -> GpuBuffer {
let dst = Self::uninit(device, self.len);
let mut encoder = device
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("tang-gpu clone"),
});
encoder.copy_buffer_to_buffer(&self.buffer, 0, &dst.buffer, 0, self.byte_size());
cache.submit_or_enqueue(device, encoder.finish());
dst
}
}