use bytemuck::{Pod, Zeroable};
use std::borrow::Cow;
use wgpu::util::DeviceExt;
#[repr(C)]
#[derive(Clone, Copy, Debug, Pod, Zeroable)]
pub struct GpuTracerConfig {
pub detector_c_bins: u32,
pub detector_g_bins: u32,
pub detector_c_res: f32,
pub detector_g_res: f32,
pub seed_offset: u32,
pub num_photons: u32,
pub source_type: u32,
pub source_flux: f32,
pub num_primitives: u32,
pub max_bounces: u32,
pub rr_threshold: f32,
pub cdf_g_steps: u32,
pub cdf_c_steps: u32,
pub cdf_g_max: f32,
pub _align_pad0: u32,
pub _align_pad1: u32,
pub area_center: [f32; 3],
pub _pad0: f32,
pub area_normal: [f32; 3],
pub _pad1: f32,
pub area_u_axis: [f32; 3],
pub area_half_width: f32,
pub area_half_height: f32,
pub _pad2: u32,
pub _pad3: u32,
pub _pad4: u32,
}
#[repr(C)]
#[derive(Clone, Copy, Debug, Pod, Zeroable)]
pub struct GpuPrimitive {
pub ptype: u32,
pub material_id: u32,
pub _pad0: u32,
pub _pad1: u32,
pub params: [f32; 12],
}
impl GpuPrimitive {
pub fn sheet(
center: [f32; 3],
normal: [f32; 3],
u_axis: [f32; 3],
half_width: f32,
half_height: f32,
thickness: f32,
material_id: u32,
) -> Self {
Self {
ptype: 0, material_id,
_pad0: 0,
_pad1: 0,
params: [
center[0],
center[1],
center[2],
normal[0],
normal[1],
normal[2],
u_axis[0],
u_axis[1],
u_axis[2],
half_width,
half_height,
thickness,
],
}
}
}
#[repr(C)]
#[derive(Clone, Copy, Debug, Pod, Zeroable)]
pub struct GpuMaterial {
pub mtype: u32,
pub _pad0: u32,
pub _pad1: u32,
pub _pad2: u32,
pub reflectance: f32,
pub ior: f32,
pub transmittance: f32,
pub min_reflectance: f32,
pub absorption_coeff: f32,
pub scattering_coeff: f32,
pub asymmetry: f32,
pub thickness: f32,
}
impl GpuMaterial {
pub fn from_material_params(params: &eulumdat_goniosim::MaterialParams) -> Self {
use eulumdat_goniosim::Material;
let mat = params.to_material();
match mat {
Material::Absorber => Self {
mtype: 0,
reflectance: 0.0,
ior: 1.0,
transmittance: 0.0,
min_reflectance: 0.0,
absorption_coeff: 0.0,
scattering_coeff: 0.0,
asymmetry: 0.0,
thickness: 0.0,
_pad0: 0,
_pad1: 0,
_pad2: 0,
},
Material::DiffuseReflector { reflectance } => Self {
mtype: 1,
reflectance: reflectance as f32,
ior: 1.0,
transmittance: 0.0,
min_reflectance: 0.0,
absorption_coeff: 0.0,
scattering_coeff: 0.0,
asymmetry: 0.0,
thickness: 0.0,
_pad0: 0,
_pad1: 0,
_pad2: 0,
},
Material::SpecularReflector { reflectance } => Self {
mtype: 2,
reflectance: reflectance as f32,
ior: 1.0,
transmittance: 0.0,
min_reflectance: 0.0,
absorption_coeff: 0.0,
scattering_coeff: 0.0,
asymmetry: 0.0,
thickness: 0.0,
_pad0: 0,
_pad1: 0,
_pad2: 0,
},
Material::ClearTransmitter {
ior,
transmittance,
min_reflectance,
} => Self {
mtype: 4,
reflectance: 0.0,
ior: ior as f32,
transmittance: transmittance as f32,
min_reflectance: min_reflectance as f32,
absorption_coeff: 0.0,
scattering_coeff: 0.0,
asymmetry: 0.0,
thickness: 0.0,
_pad0: 0,
_pad1: 0,
_pad2: 0,
},
Material::DiffuseTransmitter {
ior,
scattering_coeff,
absorption_coeff,
asymmetry,
thickness,
min_reflectance,
} => Self {
mtype: 5,
reflectance: 0.0,
ior: ior as f32,
transmittance: 0.0,
min_reflectance: min_reflectance as f32,
absorption_coeff: absorption_coeff as f32,
scattering_coeff: scattering_coeff as f32,
asymmetry: asymmetry as f32,
thickness: thickness as f32,
_pad0: 0,
_pad1: 0,
_pad2: 0,
},
Material::MixedReflector {
reflectance,
specular_fraction: _,
} => Self {
mtype: 3,
reflectance: reflectance as f32,
ior: 1.0,
transmittance: 0.0,
min_reflectance: 0.0,
absorption_coeff: 0.0,
scattering_coeff: 0.0,
asymmetry: 0.0,
thickness: 0.0,
_pad0: 0,
_pad1: 0,
_pad2: 0,
},
}
}
}
#[derive(Clone, Copy, Debug)]
pub enum SourceType {
Isotropic = 0,
Lambertian = 1,
FromLvk = 2,
AreaSource = 3,
}
pub struct GpuDetectorResult {
bins: Vec<Vec<f64>>,
num_c: usize,
num_g: usize,
c_res: f64,
g_res: f64,
}
impl GpuDetectorResult {
pub fn total_energy(&self) -> f64 {
self.bins.iter().flat_map(|row| row.iter()).sum()
}
pub fn bins(&self) -> &Vec<Vec<f64>> {
&self.bins
}
pub fn num_c(&self) -> usize {
self.num_c
}
pub fn num_g(&self) -> usize {
self.num_g
}
pub fn to_candela(&self, source_flux_lm: f64) -> Vec<Vec<f64>> {
let total = self.total_energy();
if total <= 0.0 {
return self.bins.clone();
}
let flux_per_energy = source_flux_lm / total;
let dc_rad = self.c_res.to_radians();
let mut candela = vec![vec![0.0; self.num_g]; self.num_c];
#[allow(clippy::needless_range_loop)]
for ci in 0..self.num_c {
for gi in 0..self.num_g {
let g_rad = (gi as f64 * self.g_res).to_radians();
let g_lo = (g_rad - self.g_res.to_radians() / 2.0).max(0.0);
let g_hi = (g_rad + self.g_res.to_radians() / 2.0).min(std::f64::consts::PI);
let solid_angle = dc_rad * (g_lo.cos() - g_hi.cos()).abs();
if solid_angle > 0.0 {
candela[ci][gi] = self.bins[ci][gi] * flux_per_energy / solid_angle;
}
}
}
candela
}
}
pub struct GpuTracer {
device: wgpu::Device,
queue: wgpu::Queue,
pipeline: wgpu::ComputePipeline,
bind_group_layout: wgpu::BindGroupLayout,
}
impl GpuTracer {
pub async fn new() -> Result<Self, String> {
let instance = wgpu::Instance::default();
let adapter = instance
.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::HighPerformance,
compatible_surface: None,
force_fallback_adapter: false,
})
.await
.map_err(|e| format!("No GPU adapter found: {e}"))?;
log::info!("GPU adapter: {:?}", adapter.get_info().name);
let (device, queue) = adapter
.request_device(&wgpu::DeviceDescriptor {
label: Some("eulumdat-rt"),
required_features: wgpu::Features::empty(),
required_limits: wgpu::Limits::default(),
..Default::default()
})
.await
.map_err(|e| format!("Failed to create device: {e}"))?;
let shader_source = include_str!("shaders/trace.wgsl");
let shader_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("trace.wgsl"),
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(shader_source)),
});
let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("rt_bind_group_layout"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: false },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 3,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 4,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
],
});
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("rt_pipeline_layout"),
bind_group_layouts: &[Some(&bind_group_layout)],
immediate_size: 0,
});
let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: Some("rt_trace_pipeline"),
layout: Some(&pipeline_layout),
module: &shader_module,
entry_point: Some("trace_photons"),
compilation_options: Default::default(),
cache: None,
});
Ok(Self {
device,
queue,
pipeline,
bind_group_layout,
})
}
pub async fn trace_isotropic(
&self,
num_photons: u32,
c_res_deg: f32,
g_res_deg: f32,
) -> GpuDetectorResult {
self.trace(
num_photons,
c_res_deg,
g_res_deg,
SourceType::Isotropic,
1000.0,
)
.await
}
pub async fn trace_lambertian(
&self,
num_photons: u32,
c_res_deg: f32,
g_res_deg: f32,
) -> GpuDetectorResult {
self.trace(
num_photons,
c_res_deg,
g_res_deg,
SourceType::Lambertian,
1000.0,
)
.await
}
#[allow(clippy::too_many_arguments)]
pub async fn trace_with_scene(
&self,
num_photons: u32,
c_res_deg: f32,
g_res_deg: f32,
source_type: SourceType,
source_flux: f32,
primitives: &[GpuPrimitive],
materials: &[GpuMaterial],
) -> GpuDetectorResult {
self.trace_inner(
num_photons,
c_res_deg,
g_res_deg,
source_type,
source_flux,
primitives,
materials,
&[],
0,
0,
0.0,
50,
0.01,
)
.await
}
#[allow(clippy::too_many_arguments)]
pub async fn trace_area_source(
&self,
num_photons: u32,
c_res_deg: f32,
g_res_deg: f32,
source_flux: f32,
center: [f32; 3],
normal: [f32; 3],
u_axis: [f32; 3],
half_width: f32,
half_height: f32,
) -> GpuDetectorResult {
let num_c = (360.0 / c_res_deg).round() as u32;
let num_g = (180.0 / g_res_deg).round() as u32 + 1;
let config = GpuTracerConfig {
detector_c_bins: num_c,
detector_g_bins: num_g,
detector_c_res: c_res_deg,
detector_g_res: g_res_deg,
seed_offset: 42,
num_photons,
source_type: SourceType::AreaSource as u32,
source_flux,
num_primitives: 0,
max_bounces: 50,
rr_threshold: 0.01,
cdf_g_steps: 0,
cdf_c_steps: 0,
cdf_g_max: 0.0,
_align_pad0: 0,
_align_pad1: 0,
area_center: center,
_pad0: 0.0,
area_normal: normal,
_pad1: 0.0,
area_u_axis: u_axis,
area_half_width: half_width,
area_half_height: half_height,
_pad2: 0,
_pad3: 0,
_pad4: 0,
};
self.dispatch_config(config, num_c, num_g, &[], &[], &[])
.await
}
#[allow(clippy::too_many_arguments)]
pub async fn trace_from_lvk(
&self,
num_photons: u32,
c_res_deg: f32,
g_res_deg: f32,
source_flux: f32,
cdf: &eulumdat_goniosim::source::LvkCdf,
primitives: &[GpuPrimitive],
materials: &[GpuMaterial],
) -> GpuDetectorResult {
let g_steps = cdf.g_steps;
let c_steps = cdf.c_steps;
let mut cdf_flat = Vec::with_capacity(g_steps + g_steps * c_steps);
for v in &cdf.marginal_g {
cdf_flat.push(*v as f32);
}
for row in &cdf.conditional_c {
for v in row {
cdf_flat.push(*v as f32);
}
}
self.trace_inner(
num_photons,
c_res_deg,
g_res_deg,
SourceType::FromLvk,
source_flux,
primitives,
materials,
&cdf_flat,
g_steps as u32,
c_steps as u32,
cdf.g_max as f32,
50,
0.01,
)
.await
}
async fn trace(
&self,
num_photons: u32,
c_res_deg: f32,
g_res_deg: f32,
source_type: SourceType,
source_flux: f32,
) -> GpuDetectorResult {
self.trace_inner(
num_photons,
c_res_deg,
g_res_deg,
source_type,
source_flux,
&[],
&[],
&[],
0,
0,
0.0,
1,
0.01,
)
.await
}
#[allow(clippy::too_many_arguments)]
async fn trace_inner(
&self,
num_photons: u32,
c_res_deg: f32,
g_res_deg: f32,
source_type: SourceType,
source_flux: f32,
primitives_data: &[GpuPrimitive],
materials_data: &[GpuMaterial],
cdf_data: &[f32],
cdf_g_steps: u32,
cdf_c_steps: u32,
cdf_g_max: f32,
max_bounces: u32,
rr_threshold: f32,
) -> GpuDetectorResult {
let num_c = (360.0 / c_res_deg).round() as u32;
let num_g = (180.0 / g_res_deg).round() as u32 + 1;
let config = GpuTracerConfig {
detector_c_bins: num_c,
detector_g_bins: num_g,
detector_c_res: c_res_deg,
detector_g_res: g_res_deg,
seed_offset: 42,
num_photons,
source_type: source_type as u32,
source_flux,
num_primitives: primitives_data.len() as u32,
max_bounces,
rr_threshold,
cdf_g_steps,
cdf_c_steps,
cdf_g_max,
_align_pad0: 0,
_align_pad1: 0,
area_center: [0.0; 3],
_pad0: 0.0,
area_normal: [0.0, 0.0, -1.0],
_pad1: 0.0,
area_u_axis: [1.0, 0.0, 0.0],
area_half_width: 0.0,
area_half_height: 0.0,
_pad2: 0,
_pad3: 0,
_pad4: 0,
};
self.dispatch_config(
config,
num_c,
num_g,
primitives_data,
materials_data,
cdf_data,
)
.await
}
async fn dispatch_config(
&self,
config: GpuTracerConfig,
num_c: u32,
num_g: u32,
primitives_data: &[GpuPrimitive],
materials_data: &[GpuMaterial],
cdf_data: &[f32],
) -> GpuDetectorResult {
let total_bins = num_c * num_g;
let num_photons = config.num_photons;
let c_res_deg = config.detector_c_res;
let g_res_deg = config.detector_g_res;
let config_buffer = self
.device
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("config_buffer"),
contents: bytemuck::bytes_of(&config),
usage: wgpu::BufferUsages::UNIFORM,
});
let detector_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("detector_buffer"),
size: (total_bins as u64) * 4, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let readback_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("readback_buffer"),
size: (total_bins as u64) * 4,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let dummy_prim = GpuPrimitive {
ptype: 0,
material_id: 0,
_pad0: 0,
_pad1: 0,
params: [0.0; 12],
};
let dummy_mat = GpuMaterial {
mtype: 0,
_pad0: 0,
_pad1: 0,
_pad2: 0,
reflectance: 0.0,
ior: 1.0,
transmittance: 0.0,
min_reflectance: 0.0,
absorption_coeff: 0.0,
scattering_coeff: 0.0,
asymmetry: 0.0,
thickness: 0.0,
};
let prim_buf_data: Vec<GpuPrimitive> = if primitives_data.is_empty() {
vec![dummy_prim]
} else {
primitives_data.to_vec()
};
let mat_buf_data: Vec<GpuMaterial> = if materials_data.is_empty() {
vec![dummy_mat]
} else {
materials_data.to_vec()
};
let primitives_buffer = self
.device
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("primitives_buffer"),
contents: bytemuck::cast_slice(&prim_buf_data),
usage: wgpu::BufferUsages::STORAGE,
});
let materials_buffer = self
.device
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("materials_buffer"),
contents: bytemuck::cast_slice(&mat_buf_data),
usage: wgpu::BufferUsages::STORAGE,
});
let cdf_buf_data: Vec<f32> = if cdf_data.is_empty() {
vec![0.0]
} else {
cdf_data.to_vec()
};
let cdf_buffer = self
.device
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("cdf_buffer"),
contents: bytemuck::cast_slice(&cdf_buf_data),
usage: wgpu::BufferUsages::STORAGE,
});
let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("rt_bind_group"),
layout: &self.bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: detector_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 1,
resource: config_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 2,
resource: primitives_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 3,
resource: materials_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 4,
resource: cdf_buffer.as_entire_binding(),
},
],
});
let workgroup_size = 256u32;
let num_workgroups = num_photons.div_ceil(workgroup_size);
let mut encoder = self
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("rt_encoder"),
});
{
let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: Some("rt_trace_pass"),
timestamp_writes: None,
});
pass.set_pipeline(&self.pipeline);
pass.set_bind_group(0, &bind_group, &[]);
pass.dispatch_workgroups(num_workgroups, 1, 1);
}
encoder.copy_buffer_to_buffer(
&detector_buffer,
0,
&readback_buffer,
0,
(total_bins as u64) * 4,
);
self.queue.submit(Some(encoder.finish()));
let buffer_slice = readback_buffer.slice(..);
let (tx, rx) = flume::bounded(1);
buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
tx.send(result).unwrap();
});
self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
rx.recv_async().await.unwrap().unwrap();
let data = buffer_slice.get_mapped_range();
let raw_bins: &[u32] = bytemuck::cast_slice(&data);
let mut bins = vec![vec![0.0f64; num_g as usize]; num_c as usize];
#[allow(clippy::needless_range_loop)]
for ci in 0..num_c as usize {
for gi in 0..num_g as usize {
let idx = ci * num_g as usize + gi;
bins[ci][gi] = raw_bins[idx] as f64 / 1_000.0;
}
}
drop(data);
readback_buffer.unmap();
GpuDetectorResult {
bins,
num_c: num_c as usize,
num_g: num_g as usize,
c_res: c_res_deg as f64,
g_res: g_res_deg as f64,
}
}
}