Skip to main content

eulumdat_rt/
pipeline.rs

1//! wgpu compute pipeline for photon tracing.
2
3use bytemuck::{Pod, Zeroable};
4use std::borrow::Cow;
5use wgpu::util::DeviceExt;
6
7/// GPU trace configuration — matches TraceConfig in WGSL.
8#[repr(C)]
9#[derive(Clone, Copy, Debug, Pod, Zeroable)]
10pub struct GpuTracerConfig {
11    pub detector_c_bins: u32,
12    pub detector_g_bins: u32,
13    pub detector_c_res: f32,
14    pub detector_g_res: f32,
15    pub seed_offset: u32,
16    pub num_photons: u32,
17    pub source_type: u32,
18    pub source_flux: f32,
19    pub num_primitives: u32,
20    pub max_bounces: u32,
21    pub rr_threshold: f32,
22    pub cdf_g_steps: u32,
23    pub cdf_c_steps: u32,
24    pub cdf_g_max: f32,
25    // Padding to align area_center to 16-byte boundary (WGSL vec3 alignment)
26    pub _align_pad0: u32,
27    pub _align_pad1: u32,
28    // Area source params (source_type=3)
29    pub area_center: [f32; 3],
30    pub _pad0: f32,
31    pub area_normal: [f32; 3],
32    pub _pad1: f32,
33    pub area_u_axis: [f32; 3],
34    pub area_half_width: f32,
35    pub area_half_height: f32,
36    pub _pad2: u32,
37    pub _pad3: u32,
38    pub _pad4: u32,
39}
40
41/// GPU primitive — matches GpuPrimitive in WGSL.
42#[repr(C)]
43#[derive(Clone, Copy, Debug, Pod, Zeroable)]
44pub struct GpuPrimitive {
45    pub ptype: u32,
46    pub material_id: u32,
47    pub _pad0: u32,
48    pub _pad1: u32,
49    pub params: [f32; 12],
50}
51
52impl GpuPrimitive {
53    /// Create a sheet primitive (matches CPU Primitive::Sheet).
54    pub fn sheet(
55        center: [f32; 3],
56        normal: [f32; 3],
57        u_axis: [f32; 3],
58        half_width: f32,
59        half_height: f32,
60        thickness: f32,
61        material_id: u32,
62    ) -> Self {
63        Self {
64            ptype: 0, // PRIM_SHEET
65            material_id,
66            _pad0: 0,
67            _pad1: 0,
68            params: [
69                center[0],
70                center[1],
71                center[2],
72                normal[0],
73                normal[1],
74                normal[2],
75                u_axis[0],
76                u_axis[1],
77                u_axis[2],
78                half_width,
79                half_height,
80                thickness,
81            ],
82        }
83    }
84}
85
86/// GPU material — matches GpuMaterial in WGSL.
87#[repr(C)]
88#[derive(Clone, Copy, Debug, Pod, Zeroable)]
89pub struct GpuMaterial {
90    pub mtype: u32,
91    pub _pad0: u32,
92    pub _pad1: u32,
93    pub _pad2: u32,
94    pub reflectance: f32,
95    pub ior: f32,
96    pub transmittance: f32,
97    pub min_reflectance: f32,
98    pub absorption_coeff: f32,
99    pub scattering_coeff: f32,
100    pub asymmetry: f32,
101    pub thickness: f32,
102}
103
104impl GpuMaterial {
105    /// Convert from eulumdat-goniosim MaterialParams.
106    pub fn from_material_params(params: &eulumdat_goniosim::MaterialParams) -> Self {
107        use eulumdat_goniosim::Material;
108        let mat = params.to_material();
109        match mat {
110            Material::Absorber => Self {
111                mtype: 0,
112                reflectance: 0.0,
113                ior: 1.0,
114                transmittance: 0.0,
115                min_reflectance: 0.0,
116                absorption_coeff: 0.0,
117                scattering_coeff: 0.0,
118                asymmetry: 0.0,
119                thickness: 0.0,
120                _pad0: 0,
121                _pad1: 0,
122                _pad2: 0,
123            },
124            Material::DiffuseReflector { reflectance } => Self {
125                mtype: 1,
126                reflectance: reflectance as f32,
127                ior: 1.0,
128                transmittance: 0.0,
129                min_reflectance: 0.0,
130                absorption_coeff: 0.0,
131                scattering_coeff: 0.0,
132                asymmetry: 0.0,
133                thickness: 0.0,
134                _pad0: 0,
135                _pad1: 0,
136                _pad2: 0,
137            },
138            Material::SpecularReflector { reflectance } => Self {
139                mtype: 2,
140                reflectance: reflectance as f32,
141                ior: 1.0,
142                transmittance: 0.0,
143                min_reflectance: 0.0,
144                absorption_coeff: 0.0,
145                scattering_coeff: 0.0,
146                asymmetry: 0.0,
147                thickness: 0.0,
148                _pad0: 0,
149                _pad1: 0,
150                _pad2: 0,
151            },
152            Material::ClearTransmitter {
153                ior,
154                transmittance,
155                min_reflectance,
156            } => Self {
157                mtype: 4,
158                reflectance: 0.0,
159                ior: ior as f32,
160                transmittance: transmittance as f32,
161                min_reflectance: min_reflectance as f32,
162                absorption_coeff: 0.0,
163                scattering_coeff: 0.0,
164                asymmetry: 0.0,
165                thickness: 0.0,
166                _pad0: 0,
167                _pad1: 0,
168                _pad2: 0,
169            },
170            Material::DiffuseTransmitter {
171                ior,
172                scattering_coeff,
173                absorption_coeff,
174                asymmetry,
175                thickness,
176                min_reflectance,
177            } => Self {
178                mtype: 5,
179                reflectance: 0.0,
180                ior: ior as f32,
181                transmittance: 0.0,
182                min_reflectance: min_reflectance as f32,
183                absorption_coeff: absorption_coeff as f32,
184                scattering_coeff: scattering_coeff as f32,
185                asymmetry: asymmetry as f32,
186                thickness: thickness as f32,
187                _pad0: 0,
188                _pad1: 0,
189                _pad2: 0,
190            },
191            Material::MixedReflector {
192                reflectance,
193                specular_fraction: _,
194            } => Self {
195                mtype: 3,
196                reflectance: reflectance as f32,
197                ior: 1.0,
198                transmittance: 0.0,
199                min_reflectance: 0.0,
200                absorption_coeff: 0.0,
201                scattering_coeff: 0.0,
202                asymmetry: 0.0,
203                thickness: 0.0,
204                _pad0: 0,
205                _pad1: 0,
206                _pad2: 0,
207            },
208        }
209    }
210}
211
212/// Source type enum (matches WGSL switch).
213#[derive(Clone, Copy, Debug)]
214pub enum SourceType {
215    Isotropic = 0,
216    Lambertian = 1,
217    FromLvk = 2,
218    AreaSource = 3,
219}
220
221/// Result from a GPU trace — detector bins as f64.
222pub struct GpuDetectorResult {
223    bins: Vec<Vec<f64>>,
224    num_c: usize,
225    num_g: usize,
226    c_res: f64,
227    g_res: f64,
228}
229
230impl GpuDetectorResult {
231    /// Total detected energy.
232    pub fn total_energy(&self) -> f64 {
233        self.bins.iter().flat_map(|row| row.iter()).sum()
234    }
235
236    /// Get bins as `[c][g]` array.
237    pub fn bins(&self) -> &Vec<Vec<f64>> {
238        &self.bins
239    }
240
241    /// Number of C-bins.
242    pub fn num_c(&self) -> usize {
243        self.num_c
244    }
245
246    /// Number of gamma-bins.
247    pub fn num_g(&self) -> usize {
248        self.num_g
249    }
250
251    /// Convert to candela (same formula as CPU detector).
252    pub fn to_candela(&self, source_flux_lm: f64) -> Vec<Vec<f64>> {
253        let total = self.total_energy();
254        if total <= 0.0 {
255            return self.bins.clone();
256        }
257
258        let flux_per_energy = source_flux_lm / total;
259        let dc_rad = self.c_res.to_radians();
260
261        let mut candela = vec![vec![0.0; self.num_g]; self.num_c];
262        #[allow(clippy::needless_range_loop)]
263        for ci in 0..self.num_c {
264            for gi in 0..self.num_g {
265                let g_rad = (gi as f64 * self.g_res).to_radians();
266                let g_lo = (g_rad - self.g_res.to_radians() / 2.0).max(0.0);
267                let g_hi = (g_rad + self.g_res.to_radians() / 2.0).min(std::f64::consts::PI);
268                let solid_angle = dc_rad * (g_lo.cos() - g_hi.cos()).abs();
269                if solid_angle > 0.0 {
270                    candela[ci][gi] = self.bins[ci][gi] * flux_per_energy / solid_angle;
271                }
272            }
273        }
274        candela
275    }
276}
277
278/// The GPU photon tracer.
279pub struct GpuTracer {
280    device: wgpu::Device,
281    queue: wgpu::Queue,
282    pipeline: wgpu::ComputePipeline,
283    bind_group_layout: wgpu::BindGroupLayout,
284}
285
286impl GpuTracer {
287    /// Create a new GPU tracer, requesting a wgpu device.
288    pub async fn new() -> Result<Self, String> {
289        let instance = wgpu::Instance::default();
290
291        let adapter = instance
292            .request_adapter(&wgpu::RequestAdapterOptions {
293                power_preference: wgpu::PowerPreference::HighPerformance,
294                compatible_surface: None,
295                force_fallback_adapter: false,
296            })
297            .await
298            .map_err(|e| format!("No GPU adapter found: {e}"))?;
299
300        log::info!("GPU adapter: {:?}", adapter.get_info().name);
301
302        let (device, queue) = adapter
303            .request_device(&wgpu::DeviceDescriptor {
304                label: Some("eulumdat-rt"),
305                required_features: wgpu::Features::empty(),
306                required_limits: wgpu::Limits::default(),
307                ..Default::default()
308            })
309            .await
310            .map_err(|e| format!("Failed to create device: {e}"))?;
311
312        // Load shader
313        let shader_source = include_str!("shaders/trace.wgsl");
314        let shader_module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
315            label: Some("trace.wgsl"),
316            source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(shader_source)),
317        });
318
319        // Bind group layout
320        let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
321            label: Some("rt_bind_group_layout"),
322            entries: &[
323                // detector_bins: storage buffer (read_write)
324                wgpu::BindGroupLayoutEntry {
325                    binding: 0,
326                    visibility: wgpu::ShaderStages::COMPUTE,
327                    ty: wgpu::BindingType::Buffer {
328                        ty: wgpu::BufferBindingType::Storage { read_only: false },
329                        has_dynamic_offset: false,
330                        min_binding_size: None,
331                    },
332                    count: None,
333                },
334                // config: uniform buffer
335                wgpu::BindGroupLayoutEntry {
336                    binding: 1,
337                    visibility: wgpu::ShaderStages::COMPUTE,
338                    ty: wgpu::BindingType::Buffer {
339                        ty: wgpu::BufferBindingType::Uniform,
340                        has_dynamic_offset: false,
341                        min_binding_size: None,
342                    },
343                    count: None,
344                },
345                // primitives: storage buffer (read)
346                wgpu::BindGroupLayoutEntry {
347                    binding: 2,
348                    visibility: wgpu::ShaderStages::COMPUTE,
349                    ty: wgpu::BindingType::Buffer {
350                        ty: wgpu::BufferBindingType::Storage { read_only: true },
351                        has_dynamic_offset: false,
352                        min_binding_size: None,
353                    },
354                    count: None,
355                },
356                // materials: storage buffer (read)
357                wgpu::BindGroupLayoutEntry {
358                    binding: 3,
359                    visibility: wgpu::ShaderStages::COMPUTE,
360                    ty: wgpu::BindingType::Buffer {
361                        ty: wgpu::BufferBindingType::Storage { read_only: true },
362                        has_dynamic_offset: false,
363                        min_binding_size: None,
364                    },
365                    count: None,
366                },
367                // cdf_data: storage buffer (read) for FromLvk source
368                wgpu::BindGroupLayoutEntry {
369                    binding: 4,
370                    visibility: wgpu::ShaderStages::COMPUTE,
371                    ty: wgpu::BindingType::Buffer {
372                        ty: wgpu::BufferBindingType::Storage { read_only: true },
373                        has_dynamic_offset: false,
374                        min_binding_size: None,
375                    },
376                    count: None,
377                },
378            ],
379        });
380
381        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
382            label: Some("rt_pipeline_layout"),
383            bind_group_layouts: &[Some(&bind_group_layout)],
384            immediate_size: 0,
385        });
386
387        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
388            label: Some("rt_trace_pipeline"),
389            layout: Some(&pipeline_layout),
390            module: &shader_module,
391            entry_point: Some("trace_photons"),
392            compilation_options: Default::default(),
393            cache: None,
394        });
395
396        Ok(Self {
397            device,
398            queue,
399            pipeline,
400            bind_group_layout,
401        })
402    }
403
404    /// Trace photons from an isotropic source in free space.
405    pub async fn trace_isotropic(
406        &self,
407        num_photons: u32,
408        c_res_deg: f32,
409        g_res_deg: f32,
410    ) -> GpuDetectorResult {
411        self.trace(
412            num_photons,
413            c_res_deg,
414            g_res_deg,
415            SourceType::Isotropic,
416            1000.0,
417        )
418        .await
419    }
420
421    /// Trace photons from a Lambertian source in free space.
422    pub async fn trace_lambertian(
423        &self,
424        num_photons: u32,
425        c_res_deg: f32,
426        g_res_deg: f32,
427    ) -> GpuDetectorResult {
428        self.trace(
429            num_photons,
430            c_res_deg,
431            g_res_deg,
432            SourceType::Lambertian,
433            1000.0,
434        )
435        .await
436    }
437
438    /// Trace with scene geometry and materials.
439    #[allow(clippy::too_many_arguments)]
440    pub async fn trace_with_scene(
441        &self,
442        num_photons: u32,
443        c_res_deg: f32,
444        g_res_deg: f32,
445        source_type: SourceType,
446        source_flux: f32,
447        primitives: &[GpuPrimitive],
448        materials: &[GpuMaterial],
449    ) -> GpuDetectorResult {
450        self.trace_inner(
451            num_photons,
452            c_res_deg,
453            g_res_deg,
454            source_type,
455            source_flux,
456            primitives,
457            materials,
458            &[],
459            0,
460            0,
461            0.0,
462            50,
463            0.01,
464        )
465        .await
466    }
467
468    /// Trace from a rectangular diffuse area source in free space.
469    #[allow(clippy::too_many_arguments)]
470    pub async fn trace_area_source(
471        &self,
472        num_photons: u32,
473        c_res_deg: f32,
474        g_res_deg: f32,
475        source_flux: f32,
476        center: [f32; 3],
477        normal: [f32; 3],
478        u_axis: [f32; 3],
479        half_width: f32,
480        half_height: f32,
481    ) -> GpuDetectorResult {
482        let num_c = (360.0 / c_res_deg).round() as u32;
483        let num_g = (180.0 / g_res_deg).round() as u32 + 1;
484
485        let config = GpuTracerConfig {
486            detector_c_bins: num_c,
487            detector_g_bins: num_g,
488            detector_c_res: c_res_deg,
489            detector_g_res: g_res_deg,
490            seed_offset: 42,
491            num_photons,
492            source_type: SourceType::AreaSource as u32,
493            source_flux,
494            num_primitives: 0,
495            max_bounces: 50,
496            rr_threshold: 0.01,
497            cdf_g_steps: 0,
498            cdf_c_steps: 0,
499            cdf_g_max: 0.0,
500            _align_pad0: 0,
501            _align_pad1: 0,
502            area_center: center,
503            _pad0: 0.0,
504            area_normal: normal,
505            _pad1: 0.0,
506            area_u_axis: u_axis,
507            area_half_width: half_width,
508            area_half_height: half_height,
509            _pad2: 0,
510            _pad3: 0,
511            _pad4: 0,
512        };
513
514        self.dispatch_config(config, num_c, num_g, &[], &[], &[])
515            .await
516    }
517
518    /// Trace from an LDT source (FromLvk) with optional cover geometry.
519    #[allow(clippy::too_many_arguments)]
520    pub async fn trace_from_lvk(
521        &self,
522        num_photons: u32,
523        c_res_deg: f32,
524        g_res_deg: f32,
525        source_flux: f32,
526        cdf: &eulumdat_goniosim::source::LvkCdf,
527        primitives: &[GpuPrimitive],
528        materials: &[GpuMaterial],
529    ) -> GpuDetectorResult {
530        // Flatten CDF data: marginal_g (g_steps) + conditional_c (g_steps * c_steps)
531        let g_steps = cdf.g_steps;
532        let c_steps = cdf.c_steps;
533        let mut cdf_flat = Vec::with_capacity(g_steps + g_steps * c_steps);
534        // Marginal CDF
535        for v in &cdf.marginal_g {
536            cdf_flat.push(*v as f32);
537        }
538        // Conditional CDFs (flattened)
539        for row in &cdf.conditional_c {
540            for v in row {
541                cdf_flat.push(*v as f32);
542            }
543        }
544
545        self.trace_inner(
546            num_photons,
547            c_res_deg,
548            g_res_deg,
549            SourceType::FromLvk,
550            source_flux,
551            primitives,
552            materials,
553            &cdf_flat,
554            g_steps as u32,
555            c_steps as u32,
556            cdf.g_max as f32,
557            50,
558            0.01,
559        )
560        .await
561    }
562
563    /// Core trace dispatch.
564    async fn trace(
565        &self,
566        num_photons: u32,
567        c_res_deg: f32,
568        g_res_deg: f32,
569        source_type: SourceType,
570        source_flux: f32,
571    ) -> GpuDetectorResult {
572        self.trace_inner(
573            num_photons,
574            c_res_deg,
575            g_res_deg,
576            source_type,
577            source_flux,
578            &[],
579            &[],
580            &[],
581            0,
582            0,
583            0.0,
584            1,
585            0.01,
586        )
587        .await
588    }
589
590    #[allow(clippy::too_many_arguments)]
591    async fn trace_inner(
592        &self,
593        num_photons: u32,
594        c_res_deg: f32,
595        g_res_deg: f32,
596        source_type: SourceType,
597        source_flux: f32,
598        primitives_data: &[GpuPrimitive],
599        materials_data: &[GpuMaterial],
600        cdf_data: &[f32],
601        cdf_g_steps: u32,
602        cdf_c_steps: u32,
603        cdf_g_max: f32,
604        max_bounces: u32,
605        rr_threshold: f32,
606    ) -> GpuDetectorResult {
607        let num_c = (360.0 / c_res_deg).round() as u32;
608        let num_g = (180.0 / g_res_deg).round() as u32 + 1;
609
610        let config = GpuTracerConfig {
611            detector_c_bins: num_c,
612            detector_g_bins: num_g,
613            detector_c_res: c_res_deg,
614            detector_g_res: g_res_deg,
615            seed_offset: 42,
616            num_photons,
617            source_type: source_type as u32,
618            source_flux,
619            num_primitives: primitives_data.len() as u32,
620            max_bounces,
621            rr_threshold,
622            cdf_g_steps,
623            cdf_c_steps,
624            cdf_g_max,
625            _align_pad0: 0,
626            _align_pad1: 0,
627            area_center: [0.0; 3],
628            _pad0: 0.0,
629            area_normal: [0.0, 0.0, -1.0],
630            _pad1: 0.0,
631            area_u_axis: [1.0, 0.0, 0.0],
632            area_half_width: 0.0,
633            area_half_height: 0.0,
634            _pad2: 0,
635            _pad3: 0,
636            _pad4: 0,
637        };
638
639        self.dispatch_config(
640            config,
641            num_c,
642            num_g,
643            primitives_data,
644            materials_data,
645            cdf_data,
646        )
647        .await
648    }
649
650    /// Core dispatch: creates GPU buffers, runs compute, reads back results.
651    async fn dispatch_config(
652        &self,
653        config: GpuTracerConfig,
654        num_c: u32,
655        num_g: u32,
656        primitives_data: &[GpuPrimitive],
657        materials_data: &[GpuMaterial],
658        cdf_data: &[f32],
659    ) -> GpuDetectorResult {
660        let total_bins = num_c * num_g;
661        let num_photons = config.num_photons;
662        let c_res_deg = config.detector_c_res;
663        let g_res_deg = config.detector_g_res;
664
665        let config_buffer = self
666            .device
667            .create_buffer_init(&wgpu::util::BufferInitDescriptor {
668                label: Some("config_buffer"),
669                contents: bytemuck::bytes_of(&config),
670                usage: wgpu::BufferUsages::UNIFORM,
671            });
672
673        // Detector buffer (zeros)
674        let detector_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
675            label: Some("detector_buffer"),
676            size: (total_bins as u64) * 4, // u32 per bin
677            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
678            mapped_at_creation: false,
679        });
680
681        // Readback buffer
682        let readback_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
683            label: Some("readback_buffer"),
684            size: (total_bins as u64) * 4,
685            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
686            mapped_at_creation: false,
687        });
688
689        // Primitive + material buffers (need at least 1 element for wgpu)
690        let dummy_prim = GpuPrimitive {
691            ptype: 0,
692            material_id: 0,
693            _pad0: 0,
694            _pad1: 0,
695            params: [0.0; 12],
696        };
697        let dummy_mat = GpuMaterial {
698            mtype: 0,
699            _pad0: 0,
700            _pad1: 0,
701            _pad2: 0,
702            reflectance: 0.0,
703            ior: 1.0,
704            transmittance: 0.0,
705            min_reflectance: 0.0,
706            absorption_coeff: 0.0,
707            scattering_coeff: 0.0,
708            asymmetry: 0.0,
709            thickness: 0.0,
710        };
711
712        let prim_buf_data: Vec<GpuPrimitive> = if primitives_data.is_empty() {
713            vec![dummy_prim]
714        } else {
715            primitives_data.to_vec()
716        };
717        let mat_buf_data: Vec<GpuMaterial> = if materials_data.is_empty() {
718            vec![dummy_mat]
719        } else {
720            materials_data.to_vec()
721        };
722
723        let primitives_buffer = self
724            .device
725            .create_buffer_init(&wgpu::util::BufferInitDescriptor {
726                label: Some("primitives_buffer"),
727                contents: bytemuck::cast_slice(&prim_buf_data),
728                usage: wgpu::BufferUsages::STORAGE,
729            });
730
731        let materials_buffer = self
732            .device
733            .create_buffer_init(&wgpu::util::BufferInitDescriptor {
734                label: Some("materials_buffer"),
735                contents: bytemuck::cast_slice(&mat_buf_data),
736                usage: wgpu::BufferUsages::STORAGE,
737            });
738
739        // CDF buffer
740        let cdf_buf_data: Vec<f32> = if cdf_data.is_empty() {
741            vec![0.0]
742        } else {
743            cdf_data.to_vec()
744        };
745        let cdf_buffer = self
746            .device
747            .create_buffer_init(&wgpu::util::BufferInitDescriptor {
748                label: Some("cdf_buffer"),
749                contents: bytemuck::cast_slice(&cdf_buf_data),
750                usage: wgpu::BufferUsages::STORAGE,
751            });
752
753        // Bind group
754        let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
755            label: Some("rt_bind_group"),
756            layout: &self.bind_group_layout,
757            entries: &[
758                wgpu::BindGroupEntry {
759                    binding: 0,
760                    resource: detector_buffer.as_entire_binding(),
761                },
762                wgpu::BindGroupEntry {
763                    binding: 1,
764                    resource: config_buffer.as_entire_binding(),
765                },
766                wgpu::BindGroupEntry {
767                    binding: 2,
768                    resource: primitives_buffer.as_entire_binding(),
769                },
770                wgpu::BindGroupEntry {
771                    binding: 3,
772                    resource: materials_buffer.as_entire_binding(),
773                },
774                wgpu::BindGroupEntry {
775                    binding: 4,
776                    resource: cdf_buffer.as_entire_binding(),
777                },
778            ],
779        });
780
781        // Dispatch compute
782        let workgroup_size = 256u32;
783        let num_workgroups = num_photons.div_ceil(workgroup_size);
784
785        let mut encoder = self
786            .device
787            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
788                label: Some("rt_encoder"),
789            });
790
791        {
792            let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
793                label: Some("rt_trace_pass"),
794                timestamp_writes: None,
795            });
796            pass.set_pipeline(&self.pipeline);
797            pass.set_bind_group(0, &bind_group, &[]);
798            pass.dispatch_workgroups(num_workgroups, 1, 1);
799        }
800
801        // Copy detector to readback
802        encoder.copy_buffer_to_buffer(
803            &detector_buffer,
804            0,
805            &readback_buffer,
806            0,
807            (total_bins as u64) * 4,
808        );
809
810        self.queue.submit(Some(encoder.finish()));
811
812        // Read back results
813        let buffer_slice = readback_buffer.slice(..);
814        let (tx, rx) = flume::bounded(1);
815        buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
816            tx.send(result).unwrap();
817        });
818        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
819        rx.recv_async().await.unwrap().unwrap();
820
821        let data = buffer_slice.get_mapped_range();
822        let raw_bins: &[u32] = bytemuck::cast_slice(&data);
823
824        // Convert fixed-point u32 back to f64
825        let mut bins = vec![vec![0.0f64; num_g as usize]; num_c as usize];
826        #[allow(clippy::needless_range_loop)]
827        for ci in 0..num_c as usize {
828            for gi in 0..num_g as usize {
829                let idx = ci * num_g as usize + gi;
830                bins[ci][gi] = raw_bins[idx] as f64 / 1_000.0;
831            }
832        }
833
834        drop(data);
835        readback_buffer.unmap();
836
837        GpuDetectorResult {
838            bins,
839            num_c: num_c as usize,
840            num_g: num_g as usize,
841            c_res: c_res_deg as f64,
842            g_res: g_res_deg as f64,
843        }
844    }
845}