#![allow(
clippy::cast_sign_loss,
clippy::cast_lossless,
clippy::doc_markdown,
clippy::field_reassign_with_default
)]
use wgpu::util::DeviceExt;
use crate::decompress::{ChunkUpload, CHUNK_Z, OCC_WORDS_PER_COLUMN};
pub struct GridUpload {
pub vsid: u32,
pub origin_chunk: [i32; 3],
pub chunks_dims: [u32; 3],
pub pool_dims: [u32; 3],
pub chunks: Vec<([i32; 3], ChunkUpload)>,
}
impl GridUpload {
#[must_use]
pub fn total_chunks(&self) -> u32 {
self.chunks_dims[0] * self.chunks_dims[1] * self.chunks_dims[2]
}
#[must_use]
pub fn default_pool_dims(chunks_dims: [u32; 3]) -> [u32; 3] {
[
ceil_pow2(chunks_dims[0]),
ceil_pow2(chunks_dims[1]),
ceil_pow2(chunks_dims[2]),
]
}
#[must_use]
pub fn meta_idx_of(&self, chunk_idx: [i32; 3]) -> Option<u32> {
let dx = chunk_idx[0] - self.origin_chunk[0];
let dy = chunk_idx[1] - self.origin_chunk[1];
let dz = chunk_idx[2] - self.origin_chunk[2];
if dx < 0
|| dy < 0
|| dz < 0
|| (dx as u32) >= self.chunks_dims[0]
|| (dy as u32) >= self.chunks_dims[1]
|| (dz as u32) >= self.chunks_dims[2]
{
return None;
}
Some(
(dx as u32)
+ (dy as u32) * self.chunks_dims[0]
+ (dz as u32) * self.chunks_dims[0] * self.chunks_dims[1],
)
}
}
pub struct GpuGridResident {
pub vsid: u32,
pub origin_chunk: [i32; 3],
pub chunks_dims: [u32; 3],
pub total_chunks: u32,
pub occupancy: wgpu::Buffer,
pub color_offsets: wgpu::Buffer,
pub colors: wgpu::Buffer,
pub chunk_colors_base: wgpu::Buffer,
pub chunk_occupancy: wgpu::Buffer,
pub occupancy_bytes: u64,
pub color_offsets_bytes: u64,
pub colors_bytes: u64,
}
impl GpuGridResident {
pub fn upload(device: &wgpu::Device, info: &GridUpload) -> Self {
let vsid = info.vsid;
let vsid_usize = vsid as usize;
let cols_per_chunk = vsid_usize * vsid_usize;
let occ_words_per_chunk = cols_per_chunk * (OCC_WORDS_PER_COLUMN as usize);
let offsets_words_per_chunk = cols_per_chunk + 1;
let total_chunks = info.total_chunks();
let total_chunks_usize = total_chunks as usize;
let mut occupancy = vec![0u32; total_chunks_usize * occ_words_per_chunk];
let mut color_offsets = vec![0u32; total_chunks_usize * offsets_words_per_chunk];
let mut chunk_colors_base = vec![0u32; total_chunks_usize];
let mut chunk_occupancy = vec![0u32; total_chunks_usize.div_ceil(32)];
let mut colors: Vec<u32> = Vec::new();
let mut populated = 0u32;
for (chunk_idx, chunk) in &info.chunks {
let Some(meta_idx) = info.meta_idx_of(*chunk_idx) else {
continue;
};
assert_eq!(
chunk.vsid, vsid,
"GpuGridResident: chunk vsid {} disagrees with grid vsid {}",
chunk.vsid, vsid,
);
let meta_idx_us = meta_idx as usize;
let occ_start = meta_idx_us * occ_words_per_chunk;
occupancy[occ_start..occ_start + occ_words_per_chunk].copy_from_slice(&chunk.occupancy);
let off_start = meta_idx_us * offsets_words_per_chunk;
color_offsets[off_start..off_start + offsets_words_per_chunk]
.copy_from_slice(&chunk.color_offsets);
chunk_colors_base[meta_idx_us] =
u32::try_from(colors.len()).expect("colours fit in u32");
colors.extend_from_slice(&chunk.colors);
if !chunk.colors.is_empty() {
chunk_occupancy[meta_idx_us >> 5] |= 1u32 << (meta_idx_us & 31);
populated += 1;
}
}
if colors.is_empty() {
colors.push(0);
}
let occupancy_buf = create_storage(device, "roxlap-gpu grid.occupancy", &occupancy);
let color_offsets_buf =
create_storage(device, "roxlap-gpu grid.color_offsets", &color_offsets);
let colors_buf = create_storage(device, "roxlap-gpu grid.colors", &colors);
let chunk_colors_base_buf = create_storage(
device,
"roxlap-gpu grid.chunk_colors_base",
&chunk_colors_base,
);
let chunk_occupancy_buf =
create_storage(device, "roxlap-gpu grid.chunk_occupancy", &chunk_occupancy);
let occupancy_bytes = (occupancy.len() * 4) as u64;
let color_offsets_bytes = (color_offsets.len() * 4) as u64;
let colors_bytes = (colors.len() * 4) as u64;
let _ = populated;
Self {
vsid,
origin_chunk: info.origin_chunk,
chunks_dims: info.chunks_dims,
total_chunks,
occupancy: occupancy_buf,
color_offsets: color_offsets_buf,
colors: colors_buf,
chunk_colors_base: chunk_colors_base_buf,
chunk_occupancy: chunk_occupancy_buf,
occupancy_bytes,
color_offsets_bytes,
colors_bytes,
}
}
pub fn resident_bytes(&self) -> u64 {
self.occupancy_bytes
+ self.color_offsets_bytes
+ self.colors_bytes
+ (self.total_chunks as u64) * 4 + (u64::from(self.total_chunks).div_ceil(32)) * 4 }
}
fn create_storage(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some(label),
contents: bytemuck::cast_slice(data),
usage: wgpu::BufferUsages::STORAGE,
})
}
#[must_use]
pub fn ceil_pow2(n: u32) -> u32 {
if n <= 1 {
return 1;
}
1u32 << (32 - (n - 1).leading_zeros())
}
#[must_use]
pub fn bounding_box_of(chunks: impl IntoIterator<Item = [i32; 3]>) -> Option<([i32; 3], [u32; 3])> {
let mut min = [i32::MAX; 3];
let mut max = [i32::MIN; 3];
let mut any = false;
for idx in chunks {
for i in 0..3 {
if idx[i] < min[i] {
min[i] = idx[i];
}
if idx[i] > max[i] {
max[i] = idx[i];
}
}
any = true;
}
if !any {
return None;
}
#[allow(clippy::cast_sign_loss)]
let dims = [
(max[0] - min[0] + 1) as u32,
(max[1] - min[1] + 1) as u32,
(max[2] - min[2] + 1) as u32,
];
Some((min, dims))
}
#[must_use]
pub fn occ_words_per_chunk(vsid: u32) -> u32 {
vsid * vsid * OCC_WORDS_PER_COLUMN
}
pub const GRID_CHUNK_Z: u32 = CHUNK_Z;