1use bytemuck::{Pod, Zeroable};
4use std::borrow::Cow;
5use wgpu::util::DeviceExt;
6
7#[repr(C)]
9#[derive(Clone, Copy, Debug, Pod, Zeroable)]
10pub struct GpuTracerConfig {
11 pub detector_c_bins: u32,
12 pub detector_g_bins: u32,
13 pub detector_c_res: f32,
14 pub detector_g_res: f32,
15 pub seed_offset: u32,
16 pub num_photons: u32,
17 pub source_type: u32,
18 pub source_flux: f32,
19 pub num_primitives: u32,
20 pub max_bounces: u32,
21 pub rr_threshold: f32,
22 pub cdf_g_steps: u32,
23 pub cdf_c_steps: u32,
24 pub cdf_g_max: f32,
25 pub _align_pad0: u32,
27 pub _align_pad1: u32,
28 pub area_center: [f32; 3],
30 pub _pad0: f32,
31 pub area_normal: [f32; 3],
32 pub _pad1: f32,
33 pub area_u_axis: [f32; 3],
34 pub area_half_width: f32,
35 pub area_half_height: f32,
36 pub _pad2: u32,
37 pub _pad3: u32,
38 pub _pad4: u32,
39}
40
41#[repr(C)]
43#[derive(Clone, Copy, Debug, Pod, Zeroable)]
44pub struct GpuPrimitive {
45 pub ptype: u32,
46 pub material_id: u32,
47 pub _pad0: u32,
48 pub _pad1: u32,
49 pub params: [f32; 12],
50}
51
52impl GpuPrimitive {
53 pub fn sheet(
55 center: [f32; 3],
56 normal: [f32; 3],
57 u_axis: [f32; 3],
58 half_width: f32,
59 half_height: f32,
60 thickness: f32,
61 material_id: u32,
62 ) -> Self {
63 Self {
64 ptype: 0, material_id,
66 _pad0: 0,
67 _pad1: 0,
68 params: [
69 center[0],
70 center[1],
71 center[2],
72 normal[0],
73 normal[1],
74 normal[2],
75 u_axis[0],
76 u_axis[1],
77 u_axis[2],
78 half_width,
79 half_height,
80 thickness,
81 ],
82 }
83 }
84}
85
86#[repr(C)]
88#[derive(Clone, Copy, Debug, Pod, Zeroable)]
89pub struct GpuMaterial {
90 pub mtype: u32,
91 pub _pad0: u32,
92 pub _pad1: u32,
93 pub _pad2: u32,
94 pub reflectance: f32,
95 pub ior: f32,
96 pub transmittance: f32,
97 pub min_reflectance: f32,
98 pub absorption_coeff: f32,
99 pub scattering_coeff: f32,
100 pub asymmetry: f32,
101 pub thickness: f32,
102}
103
104impl GpuMaterial {
105 pub fn from_material_params(params: &eulumdat_goniosim::MaterialParams) -> Self {
107 use eulumdat_goniosim::Material;
108 let mat = params.to_material();
109 match mat {
110 Material::Absorber => Self {
111 mtype: 0,
112 reflectance: 0.0,
113 ior: 1.0,
114 transmittance: 0.0,
115 min_reflectance: 0.0,
116 absorption_coeff: 0.0,
117 scattering_coeff: 0.0,
118 asymmetry: 0.0,
119 thickness: 0.0,
120 _pad0: 0,
121 _pad1: 0,
122 _pad2: 0,
123 },
124 Material::DiffuseReflector { reflectance } => Self {
125 mtype: 1,
126 reflectance: reflectance as f32,
127 ior: 1.0,
128 transmittance: 0.0,
129 min_reflectance: 0.0,
130 absorption_coeff: 0.0,
131 scattering_coeff: 0.0,
132 asymmetry: 0.0,
133 thickness: 0.0,
134 _pad0: 0,
135 _pad1: 0,
136 _pad2: 0,
137 },
138 Material::SpecularReflector { reflectance } => Self {
139 mtype: 2,
140 reflectance: reflectance as f32,
141 ior: 1.0,
142 transmittance: 0.0,
143 min_reflectance: 0.0,
144 absorption_coeff: 0.0,
145 scattering_coeff: 0.0,
146 asymmetry: 0.0,
147 thickness: 0.0,
148 _pad0: 0,
149 _pad1: 0,
150 _pad2: 0,
151 },
152 Material::ClearTransmitter {
153 ior,
154 transmittance,
155 min_reflectance,
156 } => Self {
157 mtype: 4,
158 reflectance: 0.0,
159 ior: ior as f32,
160 transmittance: transmittance as f32,
161 min_reflectance: min_reflectance as f32,
162 absorption_coeff: 0.0,
163 scattering_coeff: 0.0,
164 asymmetry: 0.0,
165 thickness: 0.0,
166 _pad0: 0,
167 _pad1: 0,
168 _pad2: 0,
169 },
170 Material::DiffuseTransmitter {
171 ior,
172 scattering_coeff,
173 absorption_coeff,
174 asymmetry,
175 thickness,
176 min_reflectance,
177 } => Self {
178 mtype: 5,
179 reflectance: 0.0,
180 ior: ior as f32,
181 transmittance: 0.0,
182 min_reflectance: min_reflectance as f32,
183 absorption_coeff: absorption_coeff as f32,
184 scattering_coeff: scattering_coeff as f32,
185 asymmetry: asymmetry as f32,
186 thickness: thickness as f32,
187 _pad0: 0,
188 _pad1: 0,
189 _pad2: 0,
190 },
191 Material::MixedReflector {
192 reflectance,
193 specular_fraction: _,
194 } => Self {
195 mtype: 3,
196 reflectance: reflectance as f32,
197 ior: 1.0,
198 transmittance: 0.0,
199 min_reflectance: 0.0,
200 absorption_coeff: 0.0,
201 scattering_coeff: 0.0,
202 asymmetry: 0.0,
203 thickness: 0.0,
204 _pad0: 0,
205 _pad1: 0,
206 _pad2: 0,
207 },
208 }
209 }
210}
211
212#[derive(Clone, Copy, Debug)]
214pub enum SourceType {
215 Isotropic = 0,
216 Lambertian = 1,
217 FromLvk = 2,
218 AreaSource = 3,
219}
220
221pub struct GpuDetectorResult {
223 bins: Vec<Vec<f64>>,
224 num_c: usize,
225 num_g: usize,
226 c_res: f64,
227 g_res: f64,
228}
229
230impl GpuDetectorResult {
231 pub fn total_energy(&self) -> f64 {
233 self.bins.iter().flat_map(|row| row.iter()).sum()
234 }
235
236 pub fn bins(&self) -> &Vec<Vec<f64>> {
238 &self.bins
239 }
240
241 pub fn num_c(&self) -> usize {
243 self.num_c
244 }
245
246 pub fn num_g(&self) -> usize {
248 self.num_g
249 }
250
251 pub fn to_candela(&self, source_flux_lm: f64) -> Vec<Vec<f64>> {
253 let total = self.total_energy();
254 if total <= 0.0 {
255 return self.bins.clone();
256 }
257
258 let flux_per_energy = source_flux_lm / total;
259 let dc_rad = self.c_res.to_radians();
260
261 let mut candela = vec![vec![0.0; self.num_g]; self.num_c];
262 #[allow(clippy::needless_range_loop)]
263 for ci in 0..self.num_c {
264 for gi in 0..self.num_g {
265 let g_rad = (gi as f64 * self.g_res).to_radians();
266 let g_lo = (g_rad - self.g_res.to_radians() / 2.0).max(0.0);
267 let g_hi = (g_rad + self.g_res.to_radians() / 2.0).min(std::f64::consts::PI);
268 let solid_angle = dc_rad * (g_lo.cos() - g_hi.cos()).abs();
269 if solid_angle > 0.0 {
270 candela[ci][gi] = self.bins[ci][gi] * flux_per_energy / solid_angle;
271 }
272 }
273 }
274 candela
275 }
276}
277
278pub struct GpuTracer {
280 device: wgpu::Device,
281 queue: wgpu::Queue,
282 pipeline: wgpu::ComputePipeline,
283 bind_group_layout: wgpu::BindGroupLayout,
284}
285
286impl GpuTracer {
287 pub async fn new() -> Result<Self, String> {
289 let instance = wgpu::Instance::default();
290
291 let adapter = instance
292 .request_adapter(&wgpu::RequestAdapterOptions {
293 power_preference: wgpu::PowerPreference::HighPerformance,
294 compatible_surface: None,
295 force_fallback_adapter: false,
296 })
297 .await
298 .map_err(|e| format!("No GPU adapter found: {e}"))?;
299
300 log::info!("GPU adapter: {:?}", adapter.get_info().name);
301
302 let (device, queue) = adapter
303 .request_device(&wgpu::DeviceDescriptor {
304 label: Some("eulumdat-rt"),
305 required_features: wgpu::Features::empty(),
306 required_limits: wgpu::Limits::default(),
307 ..Default::default()
308 })
309 .await
310 .map_err(|e| format!("Failed to create device: {e}"))?;
311
312 let shader_source = include_str!("shaders/trace.wgsl");
314 let shader_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
315 label: Some("trace.wgsl"),
316 source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(shader_source)),
317 });
318
319 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
321 label: Some("rt_bind_group_layout"),
322 entries: &[
323 wgpu::BindGroupLayoutEntry {
325 binding: 0,
326 visibility: wgpu::ShaderStages::COMPUTE,
327 ty: wgpu::BindingType::Buffer {
328 ty: wgpu::BufferBindingType::Storage { read_only: false },
329 has_dynamic_offset: false,
330 min_binding_size: None,
331 },
332 count: None,
333 },
334 wgpu::BindGroupLayoutEntry {
336 binding: 1,
337 visibility: wgpu::ShaderStages::COMPUTE,
338 ty: wgpu::BindingType::Buffer {
339 ty: wgpu::BufferBindingType::Uniform,
340 has_dynamic_offset: false,
341 min_binding_size: None,
342 },
343 count: None,
344 },
345 wgpu::BindGroupLayoutEntry {
347 binding: 2,
348 visibility: wgpu::ShaderStages::COMPUTE,
349 ty: wgpu::BindingType::Buffer {
350 ty: wgpu::BufferBindingType::Storage { read_only: true },
351 has_dynamic_offset: false,
352 min_binding_size: None,
353 },
354 count: None,
355 },
356 wgpu::BindGroupLayoutEntry {
358 binding: 3,
359 visibility: wgpu::ShaderStages::COMPUTE,
360 ty: wgpu::BindingType::Buffer {
361 ty: wgpu::BufferBindingType::Storage { read_only: true },
362 has_dynamic_offset: false,
363 min_binding_size: None,
364 },
365 count: None,
366 },
367 wgpu::BindGroupLayoutEntry {
369 binding: 4,
370 visibility: wgpu::ShaderStages::COMPUTE,
371 ty: wgpu::BindingType::Buffer {
372 ty: wgpu::BufferBindingType::Storage { read_only: true },
373 has_dynamic_offset: false,
374 min_binding_size: None,
375 },
376 count: None,
377 },
378 ],
379 });
380
381 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
382 label: Some("rt_pipeline_layout"),
383 bind_group_layouts: &[Some(&bind_group_layout)],
384 immediate_size: 0,
385 });
386
387 let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
388 label: Some("rt_trace_pipeline"),
389 layout: Some(&pipeline_layout),
390 module: &shader_module,
391 entry_point: Some("trace_photons"),
392 compilation_options: Default::default(),
393 cache: None,
394 });
395
396 Ok(Self {
397 device,
398 queue,
399 pipeline,
400 bind_group_layout,
401 })
402 }
403
404 pub async fn trace_isotropic(
406 &self,
407 num_photons: u32,
408 c_res_deg: f32,
409 g_res_deg: f32,
410 ) -> GpuDetectorResult {
411 self.trace(
412 num_photons,
413 c_res_deg,
414 g_res_deg,
415 SourceType::Isotropic,
416 1000.0,
417 )
418 .await
419 }
420
421 pub async fn trace_lambertian(
423 &self,
424 num_photons: u32,
425 c_res_deg: f32,
426 g_res_deg: f32,
427 ) -> GpuDetectorResult {
428 self.trace(
429 num_photons,
430 c_res_deg,
431 g_res_deg,
432 SourceType::Lambertian,
433 1000.0,
434 )
435 .await
436 }
437
438 #[allow(clippy::too_many_arguments)]
440 pub async fn trace_with_scene(
441 &self,
442 num_photons: u32,
443 c_res_deg: f32,
444 g_res_deg: f32,
445 source_type: SourceType,
446 source_flux: f32,
447 primitives: &[GpuPrimitive],
448 materials: &[GpuMaterial],
449 ) -> GpuDetectorResult {
450 self.trace_inner(
451 num_photons,
452 c_res_deg,
453 g_res_deg,
454 source_type,
455 source_flux,
456 primitives,
457 materials,
458 &[],
459 0,
460 0,
461 0.0,
462 50,
463 0.01,
464 )
465 .await
466 }
467
468 #[allow(clippy::too_many_arguments)]
470 pub async fn trace_area_source(
471 &self,
472 num_photons: u32,
473 c_res_deg: f32,
474 g_res_deg: f32,
475 source_flux: f32,
476 center: [f32; 3],
477 normal: [f32; 3],
478 u_axis: [f32; 3],
479 half_width: f32,
480 half_height: f32,
481 ) -> GpuDetectorResult {
482 let num_c = (360.0 / c_res_deg).round() as u32;
483 let num_g = (180.0 / g_res_deg).round() as u32 + 1;
484
485 let config = GpuTracerConfig {
486 detector_c_bins: num_c,
487 detector_g_bins: num_g,
488 detector_c_res: c_res_deg,
489 detector_g_res: g_res_deg,
490 seed_offset: 42,
491 num_photons,
492 source_type: SourceType::AreaSource as u32,
493 source_flux,
494 num_primitives: 0,
495 max_bounces: 50,
496 rr_threshold: 0.01,
497 cdf_g_steps: 0,
498 cdf_c_steps: 0,
499 cdf_g_max: 0.0,
500 _align_pad0: 0,
501 _align_pad1: 0,
502 area_center: center,
503 _pad0: 0.0,
504 area_normal: normal,
505 _pad1: 0.0,
506 area_u_axis: u_axis,
507 area_half_width: half_width,
508 area_half_height: half_height,
509 _pad2: 0,
510 _pad3: 0,
511 _pad4: 0,
512 };
513
514 self.dispatch_config(config, num_c, num_g, &[], &[], &[])
515 .await
516 }
517
518 #[allow(clippy::too_many_arguments)]
520 pub async fn trace_from_lvk(
521 &self,
522 num_photons: u32,
523 c_res_deg: f32,
524 g_res_deg: f32,
525 source_flux: f32,
526 cdf: &eulumdat_goniosim::source::LvkCdf,
527 primitives: &[GpuPrimitive],
528 materials: &[GpuMaterial],
529 ) -> GpuDetectorResult {
530 let g_steps = cdf.g_steps;
532 let c_steps = cdf.c_steps;
533 let mut cdf_flat = Vec::with_capacity(g_steps + g_steps * c_steps);
534 for v in &cdf.marginal_g {
536 cdf_flat.push(*v as f32);
537 }
538 for row in &cdf.conditional_c {
540 for v in row {
541 cdf_flat.push(*v as f32);
542 }
543 }
544
545 self.trace_inner(
546 num_photons,
547 c_res_deg,
548 g_res_deg,
549 SourceType::FromLvk,
550 source_flux,
551 primitives,
552 materials,
553 &cdf_flat,
554 g_steps as u32,
555 c_steps as u32,
556 cdf.g_max as f32,
557 50,
558 0.01,
559 )
560 .await
561 }
562
563 async fn trace(
565 &self,
566 num_photons: u32,
567 c_res_deg: f32,
568 g_res_deg: f32,
569 source_type: SourceType,
570 source_flux: f32,
571 ) -> GpuDetectorResult {
572 self.trace_inner(
573 num_photons,
574 c_res_deg,
575 g_res_deg,
576 source_type,
577 source_flux,
578 &[],
579 &[],
580 &[],
581 0,
582 0,
583 0.0,
584 1,
585 0.01,
586 )
587 .await
588 }
589
590 #[allow(clippy::too_many_arguments)]
591 async fn trace_inner(
592 &self,
593 num_photons: u32,
594 c_res_deg: f32,
595 g_res_deg: f32,
596 source_type: SourceType,
597 source_flux: f32,
598 primitives_data: &[GpuPrimitive],
599 materials_data: &[GpuMaterial],
600 cdf_data: &[f32],
601 cdf_g_steps: u32,
602 cdf_c_steps: u32,
603 cdf_g_max: f32,
604 max_bounces: u32,
605 rr_threshold: f32,
606 ) -> GpuDetectorResult {
607 let num_c = (360.0 / c_res_deg).round() as u32;
608 let num_g = (180.0 / g_res_deg).round() as u32 + 1;
609
610 let config = GpuTracerConfig {
611 detector_c_bins: num_c,
612 detector_g_bins: num_g,
613 detector_c_res: c_res_deg,
614 detector_g_res: g_res_deg,
615 seed_offset: 42,
616 num_photons,
617 source_type: source_type as u32,
618 source_flux,
619 num_primitives: primitives_data.len() as u32,
620 max_bounces,
621 rr_threshold,
622 cdf_g_steps,
623 cdf_c_steps,
624 cdf_g_max,
625 _align_pad0: 0,
626 _align_pad1: 0,
627 area_center: [0.0; 3],
628 _pad0: 0.0,
629 area_normal: [0.0, 0.0, -1.0],
630 _pad1: 0.0,
631 area_u_axis: [1.0, 0.0, 0.0],
632 area_half_width: 0.0,
633 area_half_height: 0.0,
634 _pad2: 0,
635 _pad3: 0,
636 _pad4: 0,
637 };
638
639 self.dispatch_config(
640 config,
641 num_c,
642 num_g,
643 primitives_data,
644 materials_data,
645 cdf_data,
646 )
647 .await
648 }
649
650 async fn dispatch_config(
652 &self,
653 config: GpuTracerConfig,
654 num_c: u32,
655 num_g: u32,
656 primitives_data: &[GpuPrimitive],
657 materials_data: &[GpuMaterial],
658 cdf_data: &[f32],
659 ) -> GpuDetectorResult {
660 let total_bins = num_c * num_g;
661 let num_photons = config.num_photons;
662 let c_res_deg = config.detector_c_res;
663 let g_res_deg = config.detector_g_res;
664
665 let config_buffer = self
666 .device
667 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
668 label: Some("config_buffer"),
669 contents: bytemuck::bytes_of(&config),
670 usage: wgpu::BufferUsages::UNIFORM,
671 });
672
673 let detector_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
675 label: Some("detector_buffer"),
676 size: (total_bins as u64) * 4, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
678 mapped_at_creation: false,
679 });
680
681 let readback_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
683 label: Some("readback_buffer"),
684 size: (total_bins as u64) * 4,
685 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
686 mapped_at_creation: false,
687 });
688
689 let dummy_prim = GpuPrimitive {
691 ptype: 0,
692 material_id: 0,
693 _pad0: 0,
694 _pad1: 0,
695 params: [0.0; 12],
696 };
697 let dummy_mat = GpuMaterial {
698 mtype: 0,
699 _pad0: 0,
700 _pad1: 0,
701 _pad2: 0,
702 reflectance: 0.0,
703 ior: 1.0,
704 transmittance: 0.0,
705 min_reflectance: 0.0,
706 absorption_coeff: 0.0,
707 scattering_coeff: 0.0,
708 asymmetry: 0.0,
709 thickness: 0.0,
710 };
711
712 let prim_buf_data: Vec<GpuPrimitive> = if primitives_data.is_empty() {
713 vec![dummy_prim]
714 } else {
715 primitives_data.to_vec()
716 };
717 let mat_buf_data: Vec<GpuMaterial> = if materials_data.is_empty() {
718 vec![dummy_mat]
719 } else {
720 materials_data.to_vec()
721 };
722
723 let primitives_buffer = self
724 .device
725 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
726 label: Some("primitives_buffer"),
727 contents: bytemuck::cast_slice(&prim_buf_data),
728 usage: wgpu::BufferUsages::STORAGE,
729 });
730
731 let materials_buffer = self
732 .device
733 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
734 label: Some("materials_buffer"),
735 contents: bytemuck::cast_slice(&mat_buf_data),
736 usage: wgpu::BufferUsages::STORAGE,
737 });
738
739 let cdf_buf_data: Vec<f32> = if cdf_data.is_empty() {
741 vec![0.0]
742 } else {
743 cdf_data.to_vec()
744 };
745 let cdf_buffer = self
746 .device
747 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
748 label: Some("cdf_buffer"),
749 contents: bytemuck::cast_slice(&cdf_buf_data),
750 usage: wgpu::BufferUsages::STORAGE,
751 });
752
753 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
755 label: Some("rt_bind_group"),
756 layout: &self.bind_group_layout,
757 entries: &[
758 wgpu::BindGroupEntry {
759 binding: 0,
760 resource: detector_buffer.as_entire_binding(),
761 },
762 wgpu::BindGroupEntry {
763 binding: 1,
764 resource: config_buffer.as_entire_binding(),
765 },
766 wgpu::BindGroupEntry {
767 binding: 2,
768 resource: primitives_buffer.as_entire_binding(),
769 },
770 wgpu::BindGroupEntry {
771 binding: 3,
772 resource: materials_buffer.as_entire_binding(),
773 },
774 wgpu::BindGroupEntry {
775 binding: 4,
776 resource: cdf_buffer.as_entire_binding(),
777 },
778 ],
779 });
780
781 let workgroup_size = 256u32;
783 let num_workgroups = num_photons.div_ceil(workgroup_size);
784
785 let mut encoder = self
786 .device
787 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
788 label: Some("rt_encoder"),
789 });
790
791 {
792 let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
793 label: Some("rt_trace_pass"),
794 timestamp_writes: None,
795 });
796 pass.set_pipeline(&self.pipeline);
797 pass.set_bind_group(0, &bind_group, &[]);
798 pass.dispatch_workgroups(num_workgroups, 1, 1);
799 }
800
801 encoder.copy_buffer_to_buffer(
803 &detector_buffer,
804 0,
805 &readback_buffer,
806 0,
807 (total_bins as u64) * 4,
808 );
809
810 self.queue.submit(Some(encoder.finish()));
811
812 let buffer_slice = readback_buffer.slice(..);
814 let (tx, rx) = flume::bounded(1);
815 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
816 tx.send(result).unwrap();
817 });
818 self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
819 rx.recv_async().await.unwrap().unwrap();
820
821 let data = buffer_slice.get_mapped_range();
822 let raw_bins: &[u32] = bytemuck::cast_slice(&data);
823
824 let mut bins = vec![vec![0.0f64; num_g as usize]; num_c as usize];
826 #[allow(clippy::needless_range_loop)]
827 for ci in 0..num_c as usize {
828 for gi in 0..num_g as usize {
829 let idx = ci * num_g as usize + gi;
830 bins[ci][gi] = raw_bins[idx] as f64 / 1_000.0;
831 }
832 }
833
834 drop(data);
835 readback_buffer.unmap();
836
837 GpuDetectorResult {
838 bins,
839 num_c: num_c as usize,
840 num_g: num_g as usize,
841 c_res: c_res_deg as f64,
842 g_res: g_res_deg as f64,
843 }
844 }
845}