use gloss_img::DynImage;
use image::imageops::FilterType;
use image::{EncodableLayout, GenericImageView, ImageBuffer};
use log::{debug, warn};
use pollster::FutureExt;
use std::borrow::Cow;
use wgpu::{util::DeviceExt, CommandEncoderDescriptor, TextureFormat};
use gloss_utils::numerical;
use crate::{buffer::Buffer, mipmap::RenderMipmapGenerator};
#[cfg(feature = "burn-torch")]
use crate::error::CudaInteropError;
#[cfg(feature = "burn-torch")]
use cust_raw;
#[cfg(feature = "burn-torch")]
use std::sync::Arc;
#[cfg(feature = "burn-torch")]
use tch::Tensor;
#[cfg(feature = "burn-torch")]
use wgpu_cuda_interop::{vulkan_wgpu_interop::WgpuBufferCudaMem, AllocSize};
#[derive(Clone, Copy)]
pub struct TexParams {
pub sample_count: u32,
pub mip_level_count: u32,
pub scale_factor: u32,
}
impl Default for TexParams {
fn default() -> Self {
Self {
sample_count: 1,
mip_level_count: 1,
scale_factor: 1,
}
}
}
impl TexParams {
pub fn from_desc(desc: &wgpu::TextureDescriptor) -> Self {
Self {
sample_count: desc.sample_count,
mip_level_count: desc.mip_level_count,
scale_factor: 1,
}
}
pub fn apply(&self, desc: &mut wgpu::TextureDescriptor) {
desc.sample_count = self.sample_count;
desc.mip_level_count = self.mip_level_count;
}
}
#[derive(Clone)]
pub struct Texture {
pub texture: wgpu::Texture,
pub view: wgpu::TextureView,
pub sampler: wgpu::Sampler, pub tex_params: TexParams,
#[cfg(feature = "burn-torch")]
pub staging_buffer_backed_by_cuda_mem: Option<Arc<WgpuBufferCudaMem>>,
}
impl Texture {
pub fn new(
device: &wgpu::Device,
width: u32,
height: u32,
format: wgpu::TextureFormat,
usage: wgpu::TextureUsages,
tex_params: TexParams,
) -> Self {
debug!("New texture");
let mut texture_desc = wgpu::TextureDescriptor {
size: wgpu::Extent3d {
width,
height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format,
usage,
label: None,
view_formats: if cfg!(target_arch = "wasm32") {
&[]
} else {
&[format.add_srgb_suffix(), format.remove_srgb_suffix()]
},
};
tex_params.apply(&mut texture_desc);
let texture = device.create_texture(&texture_desc);
let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Linear,
..Default::default()
});
Self {
texture,
view,
sampler,
tex_params,
#[cfg(feature = "burn-torch")]
staging_buffer_backed_by_cuda_mem: None,
}
}
pub fn from_bytes(device: &wgpu::Device, queue: &wgpu::Queue, bytes: &[u8], label: &str) -> Self {
let img = image::load_from_memory(bytes).unwrap();
Self::from_image(device, queue, &img, Some(label))
}
pub fn from_image(device: &wgpu::Device, queue: &wgpu::Queue, img: &image::DynamicImage, label: Option<&str>) -> Self {
let rgba = img.to_rgba8();
let dimensions = img.dimensions();
let size = wgpu::Extent3d {
width: dimensions.0,
height: dimensions.1,
depth_or_array_layers: 1,
};
let format = wgpu::TextureFormat::Rgba8UnormSrgb;
let desc = wgpu::TextureDescriptor {
label,
size,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format,
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
view_formats: if cfg!(target_arch = "wasm32") {
&[]
} else {
&[format.add_srgb_suffix(), format.remove_srgb_suffix()]
},
};
let tex_params = TexParams::from_desc(&desc);
let texture = device.create_texture(&desc);
queue.write_texture(
wgpu::TexelCopyTextureInfo {
aspect: wgpu::TextureAspect::All,
texture: &texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
},
&rgba,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(4 * dimensions.0),
rows_per_image: Some(dimensions.1),
},
size,
);
let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
});
Self {
texture,
view,
sampler,
tex_params,
#[cfg(feature = "burn-torch")]
staging_buffer_backed_by_cuda_mem: None,
}
}
pub fn from_path(path: &str, device: &wgpu::Device, queue: &wgpu::Queue, is_srgb: bool) -> Self {
let img = image::ImageReader::open(path).unwrap().decode().unwrap();
Self::from_img(
&img.try_into().unwrap(),
device,
queue,
is_srgb,
true,
false, None,
None,
)
.block_on()
.unwrap()
}
#[allow(clippy::missing_errors_doc)]
#[allow(clippy::too_many_lines)]
#[allow(clippy::too_many_arguments)]
pub async fn from_img(
img: &DynImage,
device: &wgpu::Device,
queue: &wgpu::Queue,
is_srgb: bool,
generate_mipmaps: bool,
mipmap_generation_cpu: bool,
staging_buffer: Option<&Buffer>,
mipmaper: Option<&RenderMipmapGenerator>,
) -> Result<Self, Box<dyn std::error::Error>> {
let dimensions = img.dimensions();
let nr_channels = img.color().channel_count();
let bytes_per_channel = img.color().bytes_per_pixel() / nr_channels;
assert!(bytes_per_channel == 1, "We are only supporting textures which have 1 byte per channel.");
let img_vec;
let img_buf = match nr_channels {
1 | 2 | 4 => img.as_bytes(),
3 => {
img_vec = img.to_rgba8().into_vec();
img_vec.as_bytes()
}
_ => panic!("Format with more than 4 channels not supported"),
};
let tex_format = Self::format_from_img(img, is_srgb);
let size = wgpu::Extent3d {
width: dimensions.0,
height: dimensions.1,
depth_or_array_layers: 1,
};
let mut nr_mip_maps = 1;
let mut usages = wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST;
if generate_mipmaps {
nr_mip_maps = size.max_mips(wgpu::TextureDimension::D2);
}
if mipmaper.is_some() && generate_mipmaps {
usages |= RenderMipmapGenerator::required_usage();
}
let desc = wgpu::TextureDescriptor {
label: None,
size,
mip_level_count: nr_mip_maps,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: tex_format,
usage: usages,
view_formats: if cfg!(target_arch = "wasm32") {
&[]
} else {
&[tex_format.add_srgb_suffix(), tex_format.remove_srgb_suffix()]
},
};
let tex_params = TexParams::from_desc(&desc);
let texture = device.create_texture(&desc);
Self::upload_single_mip(&texture, device, queue, &desc, img_buf, staging_buffer, 0).await?;
if generate_mipmaps {
Self::generate_mipmaps(
img,
&texture,
device,
queue,
&desc,
nr_mip_maps,
mipmap_generation_cpu,
staging_buffer,
mipmaper,
)
.await?;
}
let view = texture.create_view(&wgpu::TextureViewDescriptor {
mip_level_count: Some(nr_mip_maps),
..Default::default()
});
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
});
Ok(Self {
texture,
view,
sampler,
tex_params,
#[cfg(feature = "burn-torch")]
staging_buffer_backed_by_cuda_mem: None,
})
}
#[allow(clippy::missing_errors_doc)]
#[allow(clippy::too_many_arguments)]
pub async fn update_from_img(
&mut self,
img: &DynImage,
device: &wgpu::Device,
queue: &wgpu::Queue,
is_srgb: bool,
generate_mipmaps: bool,
mipmap_generation_cpu: bool,
staging_buffer: Option<&Buffer>,
mipmaper: Option<&RenderMipmapGenerator>,
) -> Result<(), Box<dyn std::error::Error>> {
let nr_channels = img.color().channel_count();
let bytes_per_channel = img.color().bytes_per_pixel() / nr_channels;
assert!(bytes_per_channel == 1, "We are only supporting textures which have 1 byte per channel.");
let img_vec;
let img_buf = match nr_channels {
1 | 2 | 4 => img.as_bytes(),
3 => {
img_vec = img.to_rgba8().into_vec();
img_vec.as_bytes()
}
_ => panic!("Format with more than 4 channels not supported"),
};
let size = Self::extent_from_img(img);
let tex_format = Self::format_from_img(img, is_srgb);
let mut nr_mip_maps = 1;
let mut usages = wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST;
if generate_mipmaps {
nr_mip_maps = size.max_mips(wgpu::TextureDimension::D2);
}
if mipmaper.is_some() && generate_mipmaps {
usages |= RenderMipmapGenerator::required_usage();
}
let desc = wgpu::TextureDescriptor {
label: None,
size,
mip_level_count: nr_mip_maps,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: tex_format,
usage: usages,
view_formats: if cfg!(target_arch = "wasm32") {
&[]
} else {
&[tex_format.add_srgb_suffix(), tex_format.remove_srgb_suffix()]
},
};
Self::upload_single_mip(&self.texture, device, queue, &desc, img_buf, staging_buffer, 0).await?;
if generate_mipmaps {
Self::generate_mipmaps(
img,
&self.texture,
device,
queue,
&desc,
nr_mip_maps,
mipmap_generation_cpu,
staging_buffer,
mipmaper,
)
.await?;
}
let view = self.texture.create_view(&wgpu::TextureViewDescriptor {
mip_level_count: Some(nr_mip_maps),
..Default::default()
});
self.view = view;
Ok(())
}
pub fn nr_channels(&self) -> u32 {
self.texture.format().components().into()
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::missing_errors_doc)]
pub async fn generate_mipmaps(
img: &DynImage,
texture: &wgpu::Texture,
device: &wgpu::Device,
queue: &wgpu::Queue,
desc: &wgpu::TextureDescriptor<'_>,
nr_mip_maps: u32,
mipmap_generation_cpu: bool,
staging_buffer: Option<&Buffer>,
mipmaper: Option<&RenderMipmapGenerator>,
) -> Result<(), Box<dyn std::error::Error>> {
let nr_channels = img.color().channel_count();
if mipmap_generation_cpu {
let mut img_mip = DynImage::new(1, 1, image::ColorType::L8);
for mip_lvl in 1..nr_mip_maps {
let mip_size = desc.mip_level_size(mip_lvl).unwrap();
let prev_img_mip = if mip_lvl == 1 { img } else { &img_mip };
img_mip = prev_img_mip.resize_exact(mip_size.width, mip_size.height, FilterType::Triangle);
debug!("mip lvl {mip_lvl} has size {mip_size:?}");
let img_mip_vec;
let img_mip_buf = match nr_channels {
1 | 2 | 4 => img_mip.as_bytes(),
3 => {
img_mip_vec = img_mip.to_rgba8().into_vec();
img_mip_vec.as_bytes()
}
_ => panic!("Format with more than 4 channels not supported"),
};
Self::upload_single_mip(texture, device, queue, desc, img_mip_buf, staging_buffer, mip_lvl).await?;
}
} else {
if let Some(mipmaper) = mipmaper {
let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor::default());
mipmaper.generate(device, &mut encoder, texture, desc).unwrap();
queue.submit(std::iter::once(encoder.finish()));
} else {
warn!("Couldn't generate mipmaps since the mipmapper was not provided");
}
}
Ok(())
}
pub fn extent_from_img(img: &DynImage) -> wgpu::Extent3d {
let dimensions = img.dimensions();
wgpu::Extent3d {
width: dimensions.0,
height: dimensions.1,
depth_or_array_layers: 1,
}
}
pub fn format_from_img(img: &DynImage, is_srgb: bool) -> wgpu::TextureFormat {
let nr_channels = img.color().channel_count();
let bytes_per_channel = img.color().bytes_per_pixel() / nr_channels;
assert!(bytes_per_channel == 1, "We are only supporting textures which have 1 byte per channel.");
let mut tex_format = match nr_channels {
1 => wgpu::TextureFormat::R8Unorm,
2 => wgpu::TextureFormat::Rg8Unorm,
3 | 4 => wgpu::TextureFormat::Rgba8Unorm,
_ => panic!("Format with more than 4 channels not supported"),
};
if is_srgb {
tex_format = tex_format.add_srgb_suffix();
}
tex_format
}
#[allow(clippy::missing_errors_doc)]
pub async fn upload_single_mip(
texture: &wgpu::Texture,
device: &wgpu::Device,
queue: &wgpu::Queue,
desc: &wgpu::TextureDescriptor<'_>,
data: &[u8],
staging_buffer: Option<&Buffer>,
mip: u32,
) -> Result<(), Box<dyn std::error::Error>> {
let mut mip_size = desc.mip_level_size(mip).unwrap();
if desc.dimension != wgpu::TextureDimension::D3 {
mip_size.depth_or_array_layers = 1;
}
let block_size = desc.format.block_copy_size(None).unwrap_or(4);
let (block_width, block_height) = desc.format.block_dimensions();
let mip_physical = mip_size.physical_size(desc.format);
let width_blocks = mip_physical.width / block_width;
let height_blocks = mip_physical.height / block_height;
let bytes_per_row = width_blocks * block_size;
if let Some(staging_buffer) = staging_buffer {
warn!("Using slow CPU->GPU transfer for texture upload. Might use less memory that staging buffer using by wgpu but it will be slower.");
let bytes_per_row_unpadded = texture.format().block_copy_size(None).unwrap() * mip_size.width;
let bytes_per_row_padded = numerical::align(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT);
let slice_size = numerical::align(u32::try_from(data.len()).unwrap(), u32::try_from(wgpu::COPY_BUFFER_ALIGNMENT).unwrap());
{
let buffer_slice = staging_buffer.buffer.slice(0..u64::from(slice_size));
let (tx, rx) = futures::channel::oneshot::channel();
buffer_slice.map_async(wgpu::MapMode::Write, move |result| {
tx.send(result).unwrap();
});
let _ = device.poll(wgpu::PollType::Wait);
rx.await.unwrap()?;
let mut buf_data = buffer_slice.get_mapped_range_mut();
buf_data.get_mut(0..data.len()).unwrap().clone_from_slice(data);
}
staging_buffer.buffer.unmap();
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
encoder.copy_buffer_to_texture(
wgpu::TexelCopyBufferInfo {
buffer: &staging_buffer.buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row_padded),
rows_per_image: Some(mip_size.height),
},
},
wgpu::TexelCopyTextureInfo {
aspect: wgpu::TextureAspect::All,
texture,
mip_level: mip,
origin: wgpu::Origin3d::ZERO,
},
wgpu::Extent3d {
width: mip_size.width,
height: mip_size.height,
depth_or_array_layers: 1,
},
);
queue.submit(Some(encoder.finish()));
} else {
queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture,
mip_level: mip,
origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
aspect: wgpu::TextureAspect::All,
},
data,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row),
rows_per_image: Some(height_blocks),
},
mip_physical,
);
}
Ok(())
}
pub fn upload_all_mips(
texture: &wgpu::Texture,
device: &wgpu::Device,
queue: &wgpu::Queue,
desc: &wgpu::TextureDescriptor,
data: &[u8],
staging_buffer: Option<&Buffer>,
) {
let block_size = desc.format.block_copy_size(None).unwrap_or(4);
let (block_width, block_height) = desc.format.block_dimensions();
let layer_iterations = desc.array_layer_count();
let (min_mip, max_mip) = (0, desc.mip_level_count);
let mut binary_offset = 0;
for layer in 0..layer_iterations {
for mip in min_mip..max_mip {
let mut mip_size = desc.mip_level_size(mip).unwrap();
if desc.dimension != wgpu::TextureDimension::D3 {
mip_size.depth_or_array_layers = 1;
}
let mip_physical = mip_size.physical_size(desc.format);
let width_blocks = mip_physical.width / block_width;
let height_blocks = mip_physical.height / block_height;
let bytes_per_row = width_blocks * block_size;
let data_size = bytes_per_row * height_blocks * mip_size.depth_or_array_layers;
let end_offset = binary_offset + data_size as usize;
if let Some(staging_buffer) = staging_buffer {
warn!("Using slow CPU->GPU transfer for texture upload. Might use less memory that staging buffer using by wgpu but it will be slower.");
let bytes_per_row_unpadded = texture.format().block_copy_size(None).unwrap() * mip_size.width;
let bytes_per_row_padded = numerical::align(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT);
let data_to_copy = &data[binary_offset..end_offset];
let slice_size = numerical::align(
u32::try_from(data_to_copy.len()).unwrap(),
u32::try_from(wgpu::COPY_BUFFER_ALIGNMENT).unwrap(),
);
{
let buffer_slice = staging_buffer.buffer.slice(0..u64::from(slice_size));
let (tx, rx) = futures::channel::oneshot::channel();
buffer_slice.map_async(wgpu::MapMode::Write, move |result| {
tx.send(result).unwrap();
});
let _ = device.poll(wgpu::PollType::Wait);
rx.block_on().unwrap().unwrap();
let mut buf_data = buffer_slice.get_mapped_range_mut();
buf_data.get_mut(0..data_to_copy.len()).unwrap().clone_from_slice(data_to_copy);
}
staging_buffer.buffer.unmap();
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
encoder.copy_buffer_to_texture(
wgpu::TexelCopyBufferInfo {
buffer: &staging_buffer.buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row_padded),
rows_per_image: Some(mip_size.height),
},
},
wgpu::TexelCopyTextureInfo {
aspect: wgpu::TextureAspect::All,
texture,
mip_level: mip,
origin: wgpu::Origin3d::ZERO,
},
wgpu::Extent3d {
width: mip_size.width,
height: mip_size.height,
depth_or_array_layers: 1,
},
);
queue.submit(Some(encoder.finish()));
} else {
queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture,
mip_level: mip,
origin: wgpu::Origin3d { x: 0, y: 0, z: layer },
aspect: wgpu::TextureAspect::All,
},
&data[binary_offset..end_offset],
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row),
rows_per_image: Some(height_blocks),
},
mip_physical,
);
}
binary_offset = end_offset;
}
}
}
pub fn upload_from_cpu_with_staging_buffer(
texture: &wgpu::Texture,
device: &wgpu::Device,
queue: &wgpu::Queue,
desc: &wgpu::TextureDescriptor,
data: &[u8],
staging_buffer: &Buffer,
mip_lvl: u32,
) {
let mip_size = desc.mip_level_size(mip_lvl).unwrap();
{
let buffer_slice = staging_buffer.buffer.slice(0..data.len() as u64);
let (tx, rx) = futures::channel::oneshot::channel();
buffer_slice.map_async(wgpu::MapMode::Write, move |result| {
tx.send(result).unwrap();
});
let _ = device.poll(wgpu::PollType::Wait);
rx.block_on().unwrap().unwrap();
let mut buf_data = buffer_slice.get_mapped_range_mut();
buf_data.clone_from_slice(data);
}
staging_buffer.buffer.unmap();
let bytes_per_row_unpadded = texture.format().block_copy_size(None).unwrap() * mip_size.width;
let bytes_per_row_padded = numerical::align(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT);
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
encoder.copy_buffer_to_texture(
wgpu::TexelCopyBufferInfo {
buffer: &staging_buffer.buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row_padded),
rows_per_image: Some(mip_size.height),
},
},
wgpu::TexelCopyTextureInfo {
aspect: wgpu::TextureAspect::All,
texture,
mip_level: mip_lvl,
origin: wgpu::Origin3d::ZERO,
},
wgpu::Extent3d {
width: mip_size.width,
height: mip_size.height,
depth_or_array_layers: 1,
},
);
queue.submit(Some(encoder.finish()));
let _ = device.poll(wgpu::PollType::Wait);
}
pub async fn download_to_cpu(&self, device: &wgpu::Device, queue: &wgpu::Queue, aspect: wgpu::TextureAspect) -> DynImage {
let bytes_per_row_unpadded = self.texture.format().block_copy_size(None).unwrap_or(4) * self.width();
let bytes_per_row_padded = numerical::align(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT);
let output_buffer_size = u64::from(bytes_per_row_padded * self.height());
let output_buffer_desc = wgpu::BufferDescriptor {
size: output_buffer_size,
usage: wgpu::BufferUsages::COPY_DST
| wgpu::BufferUsages::MAP_READ,
label: None,
mapped_at_creation: false,
};
let output_buffer = device.create_buffer(&output_buffer_desc);
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
aspect,
texture: &self.texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
},
wgpu::TexelCopyBufferInfo {
buffer: &output_buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row_padded),
rows_per_image: Some(self.height()),
},
},
wgpu::Extent3d {
width: self.width(),
height: self.height(),
depth_or_array_layers: 1,
},
);
queue.submit(Some(encoder.finish()));
let img: Option<DynImage> = {
let buffer_slice = output_buffer.slice(..);
let (tx, rx) = futures_intrusive::channel::shared::oneshot_channel();
buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
tx.send(result).unwrap();
});
let _ = device.poll(wgpu::PollType::Wait);
rx.receive().await.unwrap().unwrap();
let data = buffer_slice.get_mapped_range();
let data_unpadded = Texture::remove_padding(data.as_bytes(), bytes_per_row_unpadded, bytes_per_row_padded, self.height());
let w = self.width();
let h = self.height();
match self.texture.format() {
TextureFormat::Rgba8Unorm => ImageBuffer::from_raw(w, h, data_unpadded.to_vec()).map(DynImage::ImageRgba8),
TextureFormat::Bgra8Unorm => {
let bgra_data = data_unpadded.to_vec();
let mut rgba_data = bgra_data.clone();
for chunk in rgba_data.chunks_exact_mut(4) {
chunk.swap(0, 2); }
ImageBuffer::from_raw(w, h, rgba_data).map(DynImage::ImageRgba8)
}
TextureFormat::Rgba32Float => ImageBuffer::from_raw(w, h, numerical::u8_to_f32_vec(&data_unpadded)).map(DynImage::ImageRgba32F),
TextureFormat::Depth32Float | TextureFormat::Depth32FloatStencil8 => {
ImageBuffer::from_raw(w, h, numerical::u8_to_f32_vec(&data_unpadded)).map(DynImage::ImageLuma32F)
}
x => panic!("Texture format not implemented! {x:?}"),
}
};
output_buffer.unmap();
img.unwrap()
}
pub async fn download_pixel_to_cpu(&self, device: &wgpu::Device, queue: &wgpu::Queue, aspect: wgpu::TextureAspect, x: u32, y: u32) -> DynImage {
let output_buffer_desc = wgpu::BufferDescriptor {
label: Some("ID Readback Buffer"),
size: 4, usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
};
let output_buffer = device.create_buffer(&output_buffer_desc);
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("ID Readback Encoder"),
});
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
aspect,
texture: &self.texture,
mip_level: 0,
origin: wgpu::Origin3d { x, y, z: 0 },
},
wgpu::TexelCopyBufferInfo {
buffer: &output_buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: None,
rows_per_image: None,
},
},
wgpu::Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
);
queue.submit(Some(encoder.finish()));
let pixel: Option<DynImage> = {
let buffer_slice = output_buffer.slice(..);
let (tx, rx) = futures_intrusive::channel::shared::oneshot_channel();
buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
tx.send(result).unwrap();
});
let _ = device.poll(wgpu::PollType::Wait);
rx.receive().await.unwrap().unwrap();
let data = buffer_slice.get_mapped_range();
match self.texture.format() {
TextureFormat::Rgba8Unorm => {
let single_pixel_bytes = *data.to_vec().first().unwrap();
ImageBuffer::from_raw(1, 1, [single_pixel_bytes].to_vec()).map(DynImage::ImageLuma8)
}
x => panic!("Texture format not implemented! {x:?}"),
}
};
output_buffer.unmap();
pixel.unwrap()
}
pub fn remove_padding(buffer: &[u8], bytes_per_row_unpadded: u32, bytes_per_row_padded: u32, nr_rows: u32) -> Cow<'_, [u8]> {
if bytes_per_row_padded == bytes_per_row_unpadded {
return Cow::Borrowed(buffer);
}
let mut unpadded_buffer = Vec::with_capacity((bytes_per_row_unpadded * nr_rows) as _);
for row in 0..nr_rows {
let offset = (bytes_per_row_padded * row) as usize;
unpadded_buffer.extend_from_slice(&buffer[offset..(offset + bytes_per_row_unpadded as usize)]);
}
unpadded_buffer.into()
}
pub fn create_bind_group_layout(device: &wgpu::Device, binding_tex: u32, binding_sampler: u32) -> wgpu::BindGroupLayout {
device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
entries: &[
wgpu::BindGroupLayoutEntry {
binding: binding_tex, visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Texture {
multisampled: false,
view_dimension: wgpu::TextureViewDimension::D2,
sample_type: wgpu::TextureSampleType::Float { filterable: true },
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: binding_sampler, visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
},
],
label: Some("texture_bind_group_layout"),
})
}
#[must_use]
pub fn depth_linearize(&self, device: &wgpu::Device, queue: &wgpu::Queue, near: f32, far: f32) -> DynImage {
assert!(
!(self.texture.sample_count() > 1 && self.texture.format() == TextureFormat::Depth32Float),
"InvalidSampleCount: Depth maps not supported for MSAA sample count {} (Use a config to set msaa_nr_samples as 1)",
self.texture.sample_count()
);
let aspect = wgpu::TextureAspect::DepthOnly;
let dynamic_img = pollster::block_on(self.download_to_cpu(device, queue, aspect));
let w = dynamic_img.width();
let h = dynamic_img.height();
let c = dynamic_img.channels();
assert!(c == 1, "Depth maps should have only 1 channel");
let linearized_img = match dynamic_img {
DynImage::ImageLuma32F(v) => {
let img_vec_ndc = v.to_vec();
let img_vec: Vec<f32> = img_vec_ndc.iter().map(|&x| numerical::linearize_depth_reverse_z(x, near, far)).collect();
DynImage::ImageLuma32F(ImageBuffer::from_raw(w, h, img_vec).unwrap())
}
_ => panic!("Texture format not implemented for remap (Only for depths)!"),
};
linearized_img
}
pub fn create_bind_group(&self, device: &wgpu::Device, binding_tex: u32, binding_sampler: u32) -> wgpu::BindGroup {
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &Self::create_bind_group_layout(device, binding_tex, binding_sampler),
entries: &[
wgpu::BindGroupEntry {
binding: binding_tex,
resource: wgpu::BindingResource::TextureView(&self.view),
},
wgpu::BindGroupEntry {
binding: binding_sampler,
resource: wgpu::BindingResource::Sampler(&self.sampler),
},
],
label: Some("bind_group"),
});
bind_group
}
pub fn resize(&mut self, device: &wgpu::Device, width: u32, height: u32) {
let format = self.texture.format();
let usage = self.texture.usage();
let mut new = Self::new(device, width, height, format, usage, self.tex_params);
std::mem::swap(self, &mut new);
}
pub fn create_default_texture(device: &wgpu::Device, queue: &wgpu::Queue) -> Self {
let width = 4;
let height = 4;
let mut img_data: Vec<u8> = Vec::new();
for _ in 0..height {
for _ in 0..width {
img_data.push(255);
img_data.push(0);
img_data.push(0);
img_data.push(0);
}
}
let size = wgpu::Extent3d {
width,
height,
depth_or_array_layers: 1,
};
let format = wgpu::TextureFormat::Rgba8UnormSrgb;
let desc = wgpu::TextureDescriptor {
label: None,
size,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format,
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
view_formats: if cfg!(target_arch = "wasm32") {
&[]
} else {
&[format.add_srgb_suffix(), format.remove_srgb_suffix()]
},
};
let tex_params = TexParams::from_desc(&desc);
let texture = device.create_texture_with_data(queue, &desc, wgpu::util::TextureDataOrder::LayerMajor, img_data.as_slice());
let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
});
Self {
texture,
view,
sampler,
tex_params,
#[cfg(feature = "burn-torch")]
staging_buffer_backed_by_cuda_mem: None,
}
}
pub fn create_default_cubemap(device: &wgpu::Device, queue: &wgpu::Queue) -> Self {
let width = 4;
let height = 4;
let mut img_data: Vec<u8> = Vec::new();
for _ in 0..6 {
for _ in 0..height {
for _ in 0..width {
img_data.push(255);
img_data.push(0);
img_data.push(0);
img_data.push(0);
}
}
}
let size = wgpu::Extent3d {
width,
height,
depth_or_array_layers: 6,
};
let format = wgpu::TextureFormat::Rgba8UnormSrgb;
let desc = wgpu::TextureDescriptor {
label: None,
size,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format,
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
view_formats: if cfg!(target_arch = "wasm32") {
&[]
} else {
&[format.add_srgb_suffix(), format.remove_srgb_suffix()]
},
};
let tex_params = TexParams::from_desc(&desc);
let texture = device.create_texture_with_data(queue, &desc, wgpu::util::TextureDataOrder::LayerMajor, img_data.as_slice());
let view = texture.create_view(&wgpu::TextureViewDescriptor {
dimension: Some(wgpu::TextureViewDimension::Cube),
..Default::default()
});
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Linear,
..Default::default()
});
Self {
texture,
view,
sampler,
tex_params,
#[cfg(feature = "burn-torch")]
staging_buffer_backed_by_cuda_mem: None,
}
}
pub fn width(&self) -> u32 {
self.texture.width()
}
pub fn height(&self) -> u32 {
self.texture.height()
}
pub fn extent(&self) -> wgpu::Extent3d {
wgpu::Extent3d {
width: self.width(),
height: self.height(),
depth_or_array_layers: 1,
}
}
#[cfg(feature = "burn-torch")]
pub fn from_tensor(
&mut self,
tensor: &Tensor,
device: &wgpu::Device,
queue: &wgpu::Queue,
adapter: &wgpu::Adapter,
) -> Result<(), CudaInteropError> {
if tensor.dim() != 4 {
return Err(CudaInteropError::InvalidTensorDim(tensor.dim() as usize));
}
if tensor.size()[0] != 1 {
return Err(CudaInteropError::InvalidBatchSize(tensor.size()[0] as usize));
}
if tensor.kind() != tch::Kind::Uint8 {
return Err(CudaInteropError::InvalidTensorType(tensor.kind()));
}
let mut tensor_hwc: Tensor = tensor.permute([0, 2, 3, 1]).squeeze().contiguous();
let nr_channels = tensor_hwc.size()[2] as usize;
if nr_channels > 4 {
return Err(CudaInteropError::InvalidChannelSize(nr_channels));
}
if nr_channels == 3 {
let zero_channel = Tensor::empty_like(&tensor_hwc.slice(2, 0, 1, 1));
tensor_hwc = Tensor::cat(&[tensor_hwc, zero_channel], 2);
}
let nr_channels = tensor_hwc.size()[2] as usize;
let height = tensor_hwc.size()[0] as usize;
let width = tensor_hwc.size()[1] as usize;
let bytes_per_channel = 1; let img_size = AllocSize {
height: height,
width: width,
stride: width * nr_channels * bytes_per_channel,
};
if self.staging_buffer_backed_by_cuda_mem.is_none()
|| self.staging_buffer_backed_by_cuda_mem.as_ref().unwrap().cuda_mem.alloc_size != img_size
{
debug!("staging_buffer_backed_by_cuda_mem creating because it is none or the size is different");
let wgpu_cuda = wgpu_cuda_interop::interop::create_wgpu_cuda_buffer(device, adapter, img_size, wgpu::BufferUsages::COPY_SRC);
self.staging_buffer_backed_by_cuda_mem = Some(Arc::new(wgpu_cuda));
}
if self.texture.height() != height as u32 || self.texture.width() != width as u32 || self.nr_channels() != nr_channels as u32 {
let new_format = match nr_channels {
1 => wgpu::TextureFormat::R8Unorm,
2 => wgpu::TextureFormat::Rg8Unorm,
4 => wgpu::TextureFormat::Rgba8UnormSrgb,
_ => panic!("Unsupported number of channels"),
};
let new_tex = Texture::new(device, width as u32, height as u32, new_format, self.texture.usage(), self.tex_params);
self.texture = new_tex.texture;
self.view = new_tex.view;
}
let source_ptr = tensor_hwc.data_ptr() as cust_raw::CUdeviceptr;
if let Some(staging_buffer) = self.staging_buffer_backed_by_cuda_mem.as_ref() {
wgpu_cuda_interop::interop::cuda_img_to_wgpu(source_ptr, img_size, staging_buffer, &self.texture, device, queue);
}
Ok(())
}
}