threecrate_gpu/
filtering.rs

1//! GPU-accelerated filtering
2
3use threecrate_core::{PointCloud, Result, Point3f};
4use crate::GpuContext;
5
6const STATISTICAL_OUTLIER_SHADER: &str = r#"
7struct GpuPoint {
8    position: vec3<f32>,
9    _padding: f32,  // Ensure 16-byte alignment
10}
11
12@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
13@group(0) @binding(1) var<storage, read> neighbors: array<array<u32, MAX_NEIGHBORS>>;
14@group(0) @binding(2) var<storage, read_write> is_outlier: array<u32>;
15@group(0) @binding(3) var<uniform> params: FilterParams;
16
17struct FilterParams {
18    num_points: u32,
19    k_neighbors: u32,
20    std_dev_threshold: f32,
21    mean_distance: f32,
22}
23
24@compute @workgroup_size(64)
25fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
26    let index = global_id.x;
27    if (index >= params.num_points) {
28        return;
29    }
30    
31    let center_point = input_points[index].position;
32    
33    // Compute mean distance to k-nearest neighbors
34    var total_distance = 0.0;
35    var count = 0u;
36    
37    for (var i = 0u; i < params.k_neighbors; i++) {
38        let neighbor_idx = neighbors[index][i];
39        if (neighbor_idx < params.num_points) {
40            let neighbor_point = input_points[neighbor_idx].position;
41            let distance = length(center_point - neighbor_point);
42            total_distance += distance;
43            count++;
44        }
45    }
46    
47    if (count == 0u) {
48        is_outlier[index] = 0u;
49        return;
50    }
51    
52    let mean_distance = total_distance / f32(count);
53    
54    // Mark as outlier if distance is beyond threshold
55    let deviation = abs(mean_distance - params.mean_distance);
56    is_outlier[index] = select(0u, 1u, deviation > params.std_dev_threshold);
57}
58"#;
59
60const RADIUS_OUTLIER_SHADER: &str = r#"
61struct GpuPoint {
62    position: vec3<f32>,
63    _padding: f32,  // Ensure 16-byte alignment
64}
65
66@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
67@group(0) @binding(1) var<storage, read_write> is_outlier: array<u32>;
68@group(0) @binding(2) var<uniform> params: RadiusOutlierParams;
69
70struct RadiusOutlierParams {
71    num_points: u32,
72    radius: f32,
73    min_neighbors: u32,
74    _padding: u32,
75}
76
77@compute @workgroup_size(64)
78fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
79    let index = global_id.x;
80    if (index >= params.num_points) {
81        return;
82    }
83    
84    let center_point = input_points[index].position;
85    var neighbor_count = 0u;
86    
87    // Count neighbors within radius
88    for (var i = 0u; i < params.num_points; i++) {
89        if (i != index) {
90            let neighbor_point = input_points[i].position;
91            let distance = length(center_point - neighbor_point);
92            if (distance <= params.radius) {
93                neighbor_count++;
94            }
95        }
96    }
97    
98    // Mark as outlier if neighbor count is below threshold
99    is_outlier[index] = select(0u, 1u, neighbor_count < params.min_neighbors);
100}
101"#;
102
103const VOXEL_GRID_SHADER: &str = r#"
104struct GpuPoint {
105    position: vec3<f32>,
106    _padding: f32,  // Ensure 16-byte alignment
107}
108
109@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
110@group(0) @binding(1) var<storage, read_write> voxel_indices: array<u32>;
111@group(0) @binding(2) var<uniform> params: VoxelGridParams;
112
113struct VoxelGridParams {
114    num_points: u32,
115    voxel_size: f32,
116    min_x: f32,
117    min_y: f32,
118    min_z: f32,
119    max_x: f32,
120    max_y: f32,
121    max_z: f32,
122}
123
124fn get_voxel_index(point: vec3<f32>, params: VoxelGridParams) -> u32 {
125    // Simple coordinate-based voxel index (similar to CPU version)
126    let voxel_x = i32(floor((point.x - params.min_x) / params.voxel_size));
127    let voxel_y = i32(floor((point.y - params.min_y) / params.voxel_size));
128    let voxel_z = i32(floor((point.z - params.min_z) / params.voxel_size));
129    
130    // Use a simple hash based on coordinates
131    let hash_x = u32(voxel_x * voxel_x + voxel_x);
132    let hash_y = u32(voxel_y * voxel_y + voxel_y * 7);
133    let hash_z = u32(voxel_z * voxel_z + voxel_z * 13);
134    
135    return (hash_x + hash_y * 31u + hash_z * 97u) % 100000u;
136}
137
138@compute @workgroup_size(64)
139fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
140    let index = global_id.x;
141    if (index >= params.num_points) {
142        return;
143    }
144    
145    let point = input_points[index].position;
146    let voxel_idx = get_voxel_index(point, params);
147    
148    // Store voxel index for this point
149    voxel_indices[index] = voxel_idx;
150}
151"#;
152
153impl GpuContext {
154    /// Remove statistical outliers from point cloud using GPU acceleration
155    pub async fn remove_statistical_outliers(
156        &self,
157        points: &[Point3f],
158        k_neighbors: usize,
159        std_dev_multiplier: f32,
160    ) -> Result<Vec<Point3f>> {
161        if points.is_empty() {
162            return Ok(Vec::new());
163        }
164
165        // Convert points to GPU format with proper alignment
166        #[repr(C)]
167        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
168        struct GpuPoint {
169            position: [f32; 3],
170            _padding: f32,
171        }
172        
173        let point_data: Vec<GpuPoint> = points
174            .iter()
175            .map(|p| GpuPoint {
176                position: [p.x, p.y, p.z],
177                _padding: 0.0,
178            })
179            .collect();
180
181        // Convert to the format expected by helper functions
182        let point_data_flat: Vec<[f32; 3]> = point_data.iter().map(|p| p.position).collect();
183        
184        // Compute neighbors (reuse from normals computation)
185        let neighbors = self.compute_neighbors_simple(&point_data_flat, k_neighbors);
186        
187        // Compute global statistics on CPU first (could be moved to GPU)
188        let global_mean = self.compute_global_mean_distance(&point_data_flat, &neighbors, k_neighbors);
189
190        // Create buffers
191        let input_buffer = self.create_buffer_init(
192            "Input Points",
193            &point_data,
194            wgpu::BufferUsages::STORAGE,
195        );
196
197        let neighbors_buffer = self.create_buffer_init(
198            "Neighbors",
199            &neighbors,
200            wgpu::BufferUsages::STORAGE,
201        );
202
203        let outlier_buffer = self.create_buffer(
204            "Outlier Flags",
205            (point_data.len() * std::mem::size_of::<u32>()) as u64,
206            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
207        );
208
209        #[repr(C)]
210        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
211        struct FilterParams {
212            num_points: u32,
213            k_neighbors: u32,
214            std_dev_threshold: f32,
215            mean_distance: f32,
216        }
217
218        let params = FilterParams {
219            num_points: points.len() as u32,
220            k_neighbors: k_neighbors as u32,
221            std_dev_threshold: global_mean * std_dev_multiplier,
222            mean_distance: global_mean,
223        };
224
225        let params_buffer = self.create_buffer_init(
226            "Filter Params",
227            &[params],
228            wgpu::BufferUsages::UNIFORM,
229        );
230
231        // Create shader with MAX_NEIGHBORS constant
232        let shader_source = STATISTICAL_OUTLIER_SHADER.replace("MAX_NEIGHBORS", &k_neighbors.to_string());
233        let shader = self.create_shader_module("Statistical Outlier Filter", &shader_source);
234
235        // Create bind group layout
236        let bind_group_layout = self.create_bind_group_layout(
237            "Outlier Filter",
238            &[
239                wgpu::BindGroupLayoutEntry {
240                    binding: 0,
241                    visibility: wgpu::ShaderStages::COMPUTE,
242                    ty: wgpu::BindingType::Buffer {
243                        ty: wgpu::BufferBindingType::Storage { read_only: true },
244                        has_dynamic_offset: false,
245                        min_binding_size: None,
246                    },
247                    count: None,
248                },
249                wgpu::BindGroupLayoutEntry {
250                    binding: 1,
251                    visibility: wgpu::ShaderStages::COMPUTE,
252                    ty: wgpu::BindingType::Buffer {
253                        ty: wgpu::BufferBindingType::Storage { read_only: true },
254                        has_dynamic_offset: false,
255                        min_binding_size: None,
256                    },
257                    count: None,
258                },
259                wgpu::BindGroupLayoutEntry {
260                    binding: 2,
261                    visibility: wgpu::ShaderStages::COMPUTE,
262                    ty: wgpu::BindingType::Buffer {
263                        ty: wgpu::BufferBindingType::Storage { read_only: false },
264                        has_dynamic_offset: false,
265                        min_binding_size: None,
266                    },
267                    count: None,
268                },
269                wgpu::BindGroupLayoutEntry {
270                    binding: 3,
271                    visibility: wgpu::ShaderStages::COMPUTE,
272                    ty: wgpu::BindingType::Buffer {
273                        ty: wgpu::BufferBindingType::Uniform,
274                        has_dynamic_offset: false,
275                        min_binding_size: None,
276                    },
277                    count: None,
278                },
279            ],
280        );
281
282        // Create compute pipeline
283        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
284            label: Some("Outlier Filter Pipeline"),
285            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
286                label: Some("Outlier Filter Layout"),
287                bind_group_layouts: &[&bind_group_layout],
288                push_constant_ranges: &[],
289            })),
290            module: &shader,
291            entry_point: Some("main"),
292            compilation_options: wgpu::PipelineCompilationOptions::default(),
293            cache: None,
294        });
295
296        // Create bind group
297        let bind_group = self.create_bind_group(
298            "Outlier Filter",
299            &bind_group_layout,
300            &[
301                wgpu::BindGroupEntry {
302                    binding: 0,
303                    resource: input_buffer.as_entire_binding(),
304                },
305                wgpu::BindGroupEntry {
306                    binding: 1,
307                    resource: neighbors_buffer.as_entire_binding(),
308                },
309                wgpu::BindGroupEntry {
310                    binding: 2,
311                    resource: outlier_buffer.as_entire_binding(),
312                },
313                wgpu::BindGroupEntry {
314                    binding: 3,
315                    resource: params_buffer.as_entire_binding(),
316                },
317            ],
318        );
319
320        // Execute compute shader
321        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
322            label: Some("Outlier Filter"),
323        });
324
325        {
326            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
327                label: Some("Outlier Filter Pass"),
328                timestamp_writes: None,
329            });
330            compute_pass.set_pipeline(&pipeline);
331            compute_pass.set_bind_group(0, &bind_group, &[]);
332            let workgroup_count = (points.len() + 63) / 64;
333            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
334        }
335
336        // Read back results
337        let staging_buffer = self.create_buffer(
338            "Outlier Staging",
339            (point_data.len() * std::mem::size_of::<u32>()) as u64,
340            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
341        );
342
343        encoder.copy_buffer_to_buffer(
344            &outlier_buffer,
345            0,
346            &staging_buffer,
347            0,
348            (point_data.len() * std::mem::size_of::<u32>()) as u64,
349        );
350
351        self.queue.submit(std::iter::once(encoder.finish()));
352
353        // Map and read results
354        let buffer_slice = staging_buffer.slice(..);
355        let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
356        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
357
358        let _ = self.device.poll(wgpu::PollType::Wait);
359
360        if let Some(Ok(())) = receiver.receive().await {
361            let data = buffer_slice.get_mapped_range();
362            let outlier_flags: Vec<u32> = bytemuck::cast_slice(&data).to_vec();
363            
364            let filtered_points: Vec<Point3f> = points
365                .iter()
366                .zip(outlier_flags.iter())
367                .filter(|&(_, &is_outlier)| is_outlier == 0)
368                .map(|(point, _)| *point)
369                .collect();
370            
371            drop(data);
372            staging_buffer.unmap();
373            
374            Ok(filtered_points)
375        } else {
376            Err(threecrate_core::Error::Gpu("Failed to read GPU filtering results".to_string()))
377        }
378    }
379
380    /// Compute global mean distance for statistical filtering
381    fn compute_global_mean_distance(&self, points: &[[f32; 3]], neighbors: &[[u32; 64]], k: usize) -> f32 {
382        let k = k.min(64).min(points.len());
383        let mut total_distance = 0.0;
384        let mut count = 0;
385
386        for (i, point) in points.iter().enumerate() {
387            for j in 0..k {
388                let neighbor_idx = neighbors[i][j] as usize;
389                if neighbor_idx < points.len() {
390                    let neighbor_point = &points[neighbor_idx];
391                    let dx = point[0] - neighbor_point[0];
392                    let dy = point[1] - neighbor_point[1];
393                    let dz = point[2] - neighbor_point[2];
394                    let distance = (dx * dx + dy * dy + dz * dz).sqrt();
395                    total_distance += distance;
396                    count += 1;
397                }
398            }
399        }
400
401        if count > 0 {
402            total_distance / count as f32
403        } else {
404            0.0
405        }
406    }
407
408    /// Remove radius outliers from point cloud using GPU acceleration
409    pub async fn remove_radius_outliers(
410        &self,
411        points: &[Point3f],
412        radius: f32,
413        min_neighbors: usize,
414    ) -> Result<Vec<Point3f>> {
415        if points.is_empty() {
416            return Ok(Vec::new());
417        }
418
419        // Convert points to GPU format with proper alignment
420        #[repr(C)]
421        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
422        struct GpuPoint {
423            position: [f32; 3],
424            _padding: f32,
425        }
426        
427        let point_data: Vec<GpuPoint> = points
428            .iter()
429            .map(|p| GpuPoint {
430                position: [p.x, p.y, p.z],
431                _padding: 0.0,
432            })
433            .collect();
434
435        // Create buffers
436        let input_buffer = self.create_buffer_init(
437            "Input Points",
438            &point_data,
439            wgpu::BufferUsages::STORAGE,
440        );
441
442        let outlier_buffer = self.create_buffer(
443            "Outlier Flags",
444            (point_data.len() * std::mem::size_of::<u32>()) as u64,
445            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
446        );
447
448        #[repr(C)]
449        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
450        struct RadiusOutlierParams {
451            num_points: u32,
452            radius: f32,
453            min_neighbors: u32,
454            _padding: u32,
455        }
456
457        let params = RadiusOutlierParams {
458            num_points: points.len() as u32,
459            radius,
460            min_neighbors: min_neighbors as u32,
461            _padding: 0,
462        };
463
464        let params_buffer = self.create_buffer_init(
465            "Radius Outlier Params",
466            &[params],
467            wgpu::BufferUsages::UNIFORM,
468        );
469
470        // Create shader
471        let shader = self.create_shader_module("Radius Outlier Filter", RADIUS_OUTLIER_SHADER);
472
473        // Create bind group layout
474        let bind_group_layout = self.create_bind_group_layout(
475            "Radius Outlier Filter",
476            &[
477                wgpu::BindGroupLayoutEntry {
478                    binding: 0,
479                    visibility: wgpu::ShaderStages::COMPUTE,
480                    ty: wgpu::BindingType::Buffer {
481                        ty: wgpu::BufferBindingType::Storage { read_only: true },
482                        has_dynamic_offset: false,
483                        min_binding_size: None,
484                    },
485                    count: None,
486                },
487                wgpu::BindGroupLayoutEntry {
488                    binding: 1,
489                    visibility: wgpu::ShaderStages::COMPUTE,
490                    ty: wgpu::BindingType::Buffer {
491                        ty: wgpu::BufferBindingType::Storage { read_only: false },
492                        has_dynamic_offset: false,
493                        min_binding_size: None,
494                    },
495                    count: None,
496                },
497                wgpu::BindGroupLayoutEntry {
498                    binding: 2,
499                    visibility: wgpu::ShaderStages::COMPUTE,
500                    ty: wgpu::BindingType::Buffer {
501                        ty: wgpu::BufferBindingType::Uniform,
502                        has_dynamic_offset: false,
503                        min_binding_size: None,
504                    },
505                    count: None,
506                },
507            ],
508        );
509
510        // Create compute pipeline
511        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
512            label: Some("Radius Outlier Filter Pipeline"),
513            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
514                label: Some("Radius Outlier Filter Layout"),
515                bind_group_layouts: &[&bind_group_layout],
516                push_constant_ranges: &[],
517            })),
518            module: &shader,
519            entry_point: Some("main"),
520            compilation_options: wgpu::PipelineCompilationOptions::default(),
521            cache: None,
522        });
523
524        // Create bind group
525        let bind_group = self.create_bind_group(
526            "Radius Outlier Filter",
527            &bind_group_layout,
528            &[
529                wgpu::BindGroupEntry {
530                    binding: 0,
531                    resource: input_buffer.as_entire_binding(),
532                },
533                wgpu::BindGroupEntry {
534                    binding: 1,
535                    resource: outlier_buffer.as_entire_binding(),
536                },
537                wgpu::BindGroupEntry {
538                    binding: 2,
539                    resource: params_buffer.as_entire_binding(),
540                },
541            ],
542        );
543
544        // Execute compute shader
545        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
546            label: Some("Radius Outlier Filter"),
547        });
548
549        {
550            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
551                label: Some("Radius Outlier Filter Pass"),
552                timestamp_writes: None,
553            });
554            compute_pass.set_pipeline(&pipeline);
555            compute_pass.set_bind_group(0, &bind_group, &[]);
556            let workgroup_count = (points.len() + 63) / 64;
557            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
558        }
559
560        // Read back results
561        let staging_buffer = self.create_buffer(
562            "Radius Outlier Staging",
563            (point_data.len() * std::mem::size_of::<u32>()) as u64,
564            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
565        );
566
567        encoder.copy_buffer_to_buffer(
568            &outlier_buffer,
569            0,
570            &staging_buffer,
571            0,
572            (point_data.len() * std::mem::size_of::<u32>()) as u64,
573        );
574
575        self.queue.submit(std::iter::once(encoder.finish()));
576
577        // Map and read results
578        let buffer_slice = staging_buffer.slice(..);
579        let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
580        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
581
582        let _ = self.device.poll(wgpu::PollType::Wait);
583
584        if let Some(Ok(())) = receiver.receive().await {
585            let data = buffer_slice.get_mapped_range();
586            let outlier_flags: Vec<u32> = bytemuck::cast_slice(&data).to_vec();
587            
588            let filtered_points: Vec<Point3f> = points
589                .iter()
590                .zip(outlier_flags.iter())
591                .filter(|&(_, &is_outlier)| is_outlier == 0)
592                .map(|(point, _)| *point)
593                .collect();
594            
595            drop(data);
596            staging_buffer.unmap();
597            
598            Ok(filtered_points)
599        } else {
600            Err(threecrate_core::Error::Gpu("Failed to read GPU radius outlier filtering results".to_string()))
601        }
602    }
603
604    /// Voxel grid filtering using GPU acceleration
605    pub async fn voxel_grid_filter(
606        &self,
607        points: &[Point3f],
608        voxel_size: f32,
609    ) -> Result<Vec<Point3f>> {
610        if points.is_empty() {
611            return Ok(Vec::new());
612        }
613
614        if voxel_size <= 0.0 {
615            return Err(threecrate_core::Error::InvalidData("Voxel size must be positive".to_string()));
616        }
617
618        // Convert points to GPU format
619        #[repr(C)]
620        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
621        struct GpuPoint {
622            position: [f32; 3],
623            _padding: f32,
624        }
625        
626        let point_data: Vec<GpuPoint> = points
627            .iter()
628            .map(|p| GpuPoint {
629                position: [p.x, p.y, p.z],
630                _padding: 0.0,
631            })
632            .collect();
633
634        // Compute grid bounds
635        let min_x = points.iter().map(|p| p.x).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
636        let min_y = points.iter().map(|p| p.y).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
637        let min_z = points.iter().map(|p| p.z).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
638        let max_x = points.iter().map(|p| p.x).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
639        let max_y = points.iter().map(|p| p.y).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
640        let max_z = points.iter().map(|p| p.z).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
641
642        // Estimate total voxels for buffer allocation (we'll use a hash-based approach)
643        let total_voxels = points.len() * 2; // Conservative estimate
644
645        // Create buffers
646        let input_buffer = self.create_buffer_init(
647            "Input Points",
648            &point_data,
649            wgpu::BufferUsages::STORAGE,
650        );
651
652        let voxel_indices_buffer = self.create_buffer(
653            "Voxel Indices",
654            (point_data.len() * std::mem::size_of::<u32>()) as u64,
655            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
656        );
657
658
659
660        #[repr(C)]
661        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
662        struct VoxelGridParams {
663            num_points: u32,
664            voxel_size: f32,
665            min_x: f32,
666            min_y: f32,
667            min_z: f32,
668            max_x: f32,
669            max_y: f32,
670            max_z: f32,
671        }
672
673        let params = VoxelGridParams {
674            num_points: points.len() as u32,
675            voxel_size,
676            min_x,
677            min_y,
678            min_z,
679            max_x,
680            max_y,
681            max_z,
682        };
683
684        let params_buffer = self.create_buffer_init(
685            "Voxel Grid Params",
686            &[params],
687            wgpu::BufferUsages::UNIFORM,
688        );
689
690        // Create shader
691        let shader = self.create_shader_module("Voxel Grid Filter", VOXEL_GRID_SHADER);
692
693        // Create bind group layout
694        let bind_group_layout = self.create_bind_group_layout(
695            "Voxel Grid Filter",
696            &[
697                wgpu::BindGroupLayoutEntry {
698                    binding: 0,
699                    visibility: wgpu::ShaderStages::COMPUTE,
700                    ty: wgpu::BindingType::Buffer {
701                        ty: wgpu::BufferBindingType::Storage { read_only: true },
702                        has_dynamic_offset: false,
703                        min_binding_size: None,
704                    },
705                    count: None,
706                },
707                wgpu::BindGroupLayoutEntry {
708                    binding: 1,
709                    visibility: wgpu::ShaderStages::COMPUTE,
710                    ty: wgpu::BindingType::Buffer {
711                        ty: wgpu::BufferBindingType::Storage { read_only: false },
712                        has_dynamic_offset: false,
713                        min_binding_size: None,
714                    },
715                    count: None,
716                },
717                wgpu::BindGroupLayoutEntry {
718                    binding: 2,
719                    visibility: wgpu::ShaderStages::COMPUTE,
720                    ty: wgpu::BindingType::Buffer {
721                        ty: wgpu::BufferBindingType::Uniform,
722                        has_dynamic_offset: false,
723                        min_binding_size: None,
724                    },
725                    count: None,
726                },
727            ],
728        );
729
730        // Create compute pipeline
731        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
732            label: Some("Voxel Grid Filter Pipeline"),
733            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
734                label: Some("Voxel Grid Filter Layout"),
735                bind_group_layouts: &[&bind_group_layout],
736                push_constant_ranges: &[],
737            })),
738            module: &shader,
739            entry_point: Some("main"),
740            compilation_options: wgpu::PipelineCompilationOptions::default(),
741            cache: None,
742        });
743
744        // Create bind group
745        let bind_group = self.create_bind_group(
746            "Voxel Grid Filter",
747            &bind_group_layout,
748            &[
749                wgpu::BindGroupEntry {
750                    binding: 0,
751                    resource: input_buffer.as_entire_binding(),
752                },
753                wgpu::BindGroupEntry {
754                    binding: 1,
755                    resource: voxel_indices_buffer.as_entire_binding(),
756                },
757                wgpu::BindGroupEntry {
758                    binding: 2,
759                    resource: params_buffer.as_entire_binding(),
760                },
761            ],
762        );
763
764        // Execute compute shader
765        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
766            label: Some("Voxel Grid Filter"),
767        });
768
769        {
770            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
771                label: Some("Voxel Grid Filter Pass"),
772                timestamp_writes: None,
773            });
774            compute_pass.set_pipeline(&pipeline);
775            compute_pass.set_bind_group(0, &bind_group, &[]);
776            let workgroup_count = (points.len() + 63) / 64;
777            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
778        }
779
780        // Read back voxel indices
781        let indices_staging = self.create_buffer(
782            "Voxel Indices Staging",
783            (point_data.len() * std::mem::size_of::<u32>()) as u64,
784            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
785        );
786
787        encoder.copy_buffer_to_buffer(
788            &voxel_indices_buffer,
789            0,
790            &indices_staging,
791            0,
792            (point_data.len() * std::mem::size_of::<u32>()) as u64,
793        );
794
795        self.queue.submit(std::iter::once(encoder.finish()));
796
797        // Map and read results
798        let indices_slice = indices_staging.slice(..);
799        
800        let (indices_sender, indices_receiver) = futures_intrusive::channel::shared::oneshot_channel();
801        
802        indices_slice.map_async(wgpu::MapMode::Read, move |v| indices_sender.send(v).unwrap());
803
804        let _ = self.device.poll(wgpu::PollType::Wait);
805
806        if let Some(Ok(())) = indices_receiver.receive().await {
807            let indices_data = indices_slice.get_mapped_range();
808            
809            let voxel_indices: Vec<u32> = bytemuck::cast_slice(&indices_data).to_vec();
810            
811            // Post-process on CPU: keep one point per voxel (the first one)
812            let mut voxel_used = vec![false; total_voxels];
813            let mut filtered_points = Vec::new();
814            
815            for (point_idx, &voxel_idx) in voxel_indices.iter().enumerate() {
816                if voxel_idx < total_voxels as u32 && !voxel_used[voxel_idx as usize] {
817                    filtered_points.push(points[point_idx]);
818                    voxel_used[voxel_idx as usize] = true;
819                }
820            }
821            
822            drop(indices_data);
823            indices_staging.unmap();
824            
825            Ok(filtered_points)
826        } else {
827            Err(threecrate_core::Error::Gpu("Failed to read GPU voxel grid filtering results".to_string()))
828        }
829    }
830}
831
832/// GPU-accelerated statistical outlier removal
833pub async fn gpu_remove_statistical_outliers(
834    gpu_context: &GpuContext,
835    cloud: &PointCloud<Point3f>,
836    k_neighbors: usize,
837    std_dev_multiplier: f32,
838) -> Result<PointCloud<Point3f>> {
839    let filtered_points = gpu_context.remove_statistical_outliers(&cloud.points, k_neighbors, std_dev_multiplier).await?;
840    Ok(PointCloud::from_points(filtered_points))
841}
842
843/// GPU-accelerated radius outlier removal
844pub async fn gpu_radius_outlier_removal(
845    gpu_context: &GpuContext,
846    cloud: &PointCloud<Point3f>,
847    radius: f32,
848    min_neighbors: usize,
849) -> Result<PointCloud<Point3f>> {
850    let filtered_points = gpu_context.remove_radius_outliers(&cloud.points, radius, min_neighbors).await?;
851    Ok(PointCloud::from_points(filtered_points))
852}
853
854/// GPU-accelerated voxel grid filtering
855pub async fn gpu_voxel_grid_filter(
856    gpu_context: &GpuContext,
857    cloud: &PointCloud<Point3f>,
858    voxel_size: f32,
859) -> Result<PointCloud<Point3f>> {
860    let filtered_points = gpu_context.voxel_grid_filter(&cloud.points, voxel_size).await?;
861    Ok(PointCloud::from_points(filtered_points))
862} 
863
864#[cfg(test)]
865mod tests {
866    use super::*;
867    use threecrate_core::{PointCloud, Point3f};
868    use std::time::Instant;
869
870    async fn try_create_gpu_context() -> Option<GpuContext> {
871        GpuContext::new().await.ok()
872    }
873
874    fn create_test_point_cloud_with_outliers() -> PointCloud<Point3f> {
875        let mut points = Vec::new();
876        
877        // Create a cluster of normal points
878        for i in 0..50 {
879            for j in 0..50 {
880                let x = (i as f32 - 25.0) * 0.1;
881                let y = (j as f32 - 25.0) * 0.1;
882                let z = 0.0;
883                points.push(Point3f::new(x, y, z));
884            }
885        }
886        
887        // Add some outliers far from the cluster
888        for i in 0..20 {
889            let x = 50.0 + (i as f32 * 2.0);
890            let y = 50.0 + (i as f32 * 2.0);
891            let z = 50.0 + (i as f32 * 2.0);
892            points.push(Point3f::new(x, y, z));
893        }
894        
895        PointCloud::from_points(points)
896    }
897
898    fn create_test_point_cloud_no_outliers() -> PointCloud<Point3f> {
899        let mut points = Vec::new();
900        
901        // Create a uniform grid of points
902        for i in 0..25 {
903            for j in 0..25 {
904                for k in 0..5 {
905                    let x = (i as f32 - 12.5) * 0.1;
906                    let y = (j as f32 - 12.5) * 0.1;
907                    let z = (k as f32 - 2.5) * 0.1;
908                    points.push(Point3f::new(x, y, z));
909                }
910            }
911        }
912        
913        PointCloud::from_points(points)
914    }
915
916    #[tokio::test]
917    async fn test_gpu_statistical_outlier_removal_basic() {
918        let gpu_context = match try_create_gpu_context().await {
919            Some(ctx) => ctx,
920            None => {
921                println!("Skipping GPU test - no GPU available");
922                return;
923            }
924        };
925
926        let cloud = create_test_point_cloud_with_outliers();
927        let original_count = cloud.len();
928        
929        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
930        let filtered_count = filtered.len();
931        
932        // Should remove some outliers
933        assert!(filtered_count < original_count);
934        assert!(filtered_count > 0);
935        
936        println!("GPU outlier removal: {} -> {} points", original_count, filtered_count);
937    }
938
939    #[tokio::test]
940    async fn test_gpu_statistical_outlier_removal_no_outliers() {
941        let gpu_context = match try_create_gpu_context().await {
942            Some(ctx) => ctx,
943            None => {
944                println!("Skipping GPU test - no GPU available");
945                return;
946            }
947        };
948
949        let cloud = create_test_point_cloud_no_outliers();
950        let original_count = cloud.len();
951        
952        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
953        let filtered_count = filtered.len();
954        
955        println!("GPU outlier removal (no outliers): {} -> {} points", original_count, filtered_count);
956        
957        // The algorithm should remove some points but not all
958        // Even with no obvious outliers, the algorithm may remove points based on statistical analysis
959        assert!(filtered_count > 0);
960        assert!(filtered_count < original_count);
961    }
962
963    #[tokio::test]
964    async fn test_gpu_statistical_outlier_removal_empty_cloud() {
965        let gpu_context = match try_create_gpu_context().await {
966            Some(ctx) => ctx,
967            None => {
968                println!("Skipping GPU test - no GPU available");
969                return;
970            }
971        };
972
973        let cloud = PointCloud::<Point3f>::new();
974        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
975        
976        assert_eq!(filtered.len(), 0);
977    }
978
979    #[tokio::test]
980    async fn test_gpu_statistical_outlier_removal_single_point() {
981        let gpu_context = match try_create_gpu_context().await {
982            Some(ctx) => ctx,
983            None => {
984                println!("Skipping GPU test - no GPU available");
985                return;
986            }
987        };
988
989        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
990        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 1, 1.0).await.unwrap();
991        
992        // Should keep the single point
993        assert_eq!(filtered.len(), 1);
994    }
995
996    #[tokio::test]
997    async fn test_gpu_vs_cpu_statistical_outlier_removal() {
998        let gpu_context = match try_create_gpu_context().await {
999            Some(ctx) => ctx,
1000            None => {
1001                println!("Skipping GPU vs CPU test - no GPU available");
1002                return;
1003            }
1004        };
1005
1006        let cloud = create_test_point_cloud_with_outliers();
1007        
1008        // CPU version - use the same algorithm logic but avoid type conflicts
1009        let cpu_start = Instant::now();
1010        let cpu_filtered_count = {
1011            // Simple CPU implementation for comparison
1012            let points = &cloud.points;
1013            let k = 10;
1014            let std_dev_multiplier = 1.0;
1015            
1016            if points.is_empty() {
1017                0
1018            } else {
1019                // Simple distance-based outlier detection
1020                let mut mean_distances = Vec::new();
1021                for point in points {
1022                    let mut distances = Vec::new();
1023                    for other_point in points {
1024                        if other_point != point {
1025                            let dx = point.x - other_point.x;
1026                            let dy = point.y - other_point.y;
1027                            let dz = point.z - other_point.z;
1028                            let distance = (dx * dx + dy * dy + dz * dz).sqrt();
1029                            distances.push(distance);
1030                        }
1031                    }
1032                    distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
1033                    let k_neighbors = k.min(distances.len());
1034                    if k_neighbors > 0 {
1035                        let mean = distances[..k_neighbors].iter().sum::<f32>() / k_neighbors as f32;
1036                        mean_distances.push(mean);
1037                    } else {
1038                        mean_distances.push(0.0);
1039                    }
1040                }
1041                
1042                let global_mean = mean_distances.iter().sum::<f32>() / mean_distances.len() as f32;
1043                let variance = mean_distances.iter().map(|&d| (d - global_mean).powi(2)).sum::<f32>() / mean_distances.len() as f32;
1044                let global_std_dev = variance.sqrt();
1045                let threshold = global_mean + std_dev_multiplier * global_std_dev;
1046                
1047                mean_distances.iter().filter(|&&d| d <= threshold).count()
1048            }
1049        };
1050        let cpu_time = cpu_start.elapsed();
1051        
1052        // GPU version
1053        let gpu_start = Instant::now();
1054        let gpu_filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
1055        let gpu_time = gpu_start.elapsed();
1056        
1057        println!("CPU outlier removal: {} -> {} points in {:?}", 
1058                cloud.len(), cpu_filtered_count, cpu_time);
1059        println!("GPU outlier removal: {} -> {} points in {:?}", 
1060                cloud.len(), gpu_filtered.len(), gpu_time);
1061        
1062        // Both should remove outliers (exact counts may vary due to different algorithms)
1063        assert!(cpu_filtered_count < cloud.len());
1064        assert!(gpu_filtered.len() < cloud.len());
1065        
1066        // Performance comparison (GPU should be faster for larger datasets)
1067        if cloud.len() > 1000 {
1068            println!("GPU speedup: {:.2}x", cpu_time.as_secs_f32() / gpu_time.as_secs_f32());
1069        }
1070    }
1071
1072    #[tokio::test]
1073    async fn test_gpu_statistical_outlier_removal_different_parameters() {
1074        let gpu_context = match try_create_gpu_context().await {
1075            Some(ctx) => ctx,
1076            None => {
1077                println!("Skipping GPU test - no GPU available");
1078                return;
1079            }
1080        };
1081
1082        let cloud = create_test_point_cloud_with_outliers();
1083        let original_count = cloud.len();
1084        
1085        // Test different k values
1086        for k in [5, 10, 20] {
1087            let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, k, 1.0).await.unwrap();
1088            println!("k={}: {} -> {} points", k, original_count, filtered.len());
1089            assert!(filtered.len() > 0);
1090        }
1091        
1092        // Test different std_dev_multiplier values
1093        for std_dev in [0.5, 1.0, 2.0] {
1094            let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, std_dev).await.unwrap();
1095            println!("std_dev={}: {} -> {} points", std_dev, original_count, filtered.len());
1096            assert!(filtered.len() > 0);
1097        }
1098    }
1099
1100    #[tokio::test]
1101    async fn test_gpu_statistical_outlier_removal_large_dataset() {
1102        let gpu_context = match try_create_gpu_context().await {
1103            Some(ctx) => ctx,
1104            None => {
1105                println!("Skipping GPU test - no GPU available");
1106                return;
1107            }
1108        };
1109
1110        // Create a smaller dataset for faster testing
1111        let mut points = Vec::new();
1112        for i in 0..50 {
1113            for j in 0..50 {
1114                let x = (i as f32 - 25.0) * 0.1;
1115                let y = (j as f32 - 25.0) * 0.1;
1116                let z = 0.0;
1117                points.push(Point3f::new(x, y, z));
1118            }
1119        }
1120        
1121        // Add some outliers
1122        for i in 0..10 {
1123            let x = 25.0 + (i as f32 * 2.0);
1124            let y = 25.0 + (i as f32 * 2.0);
1125            let z = 25.0 + (i as f32 * 2.0);
1126            points.push(Point3f::new(x, y, z));
1127        }
1128        
1129        let cloud = PointCloud::from_points(points);
1130        let original_count = cloud.len();
1131        
1132        let start = Instant::now();
1133        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 5, 1.0).await.unwrap();
1134        let elapsed = start.elapsed();
1135        
1136        println!("Large dataset GPU outlier removal: {} -> {} points in {:?}", 
1137                original_count, filtered.len(), elapsed);
1138        
1139        assert!(filtered.len() < original_count);
1140        assert!(filtered.len() > 0);
1141        
1142        // Should complete in reasonable time
1143        assert!(elapsed.as_secs() < 10);
1144    }
1145
1146    fn create_test_point_cloud_for_radius_outlier() -> PointCloud<Point3f> {
1147        let mut points = Vec::new();
1148        
1149        // Create a dense cluster
1150        for i in 0..20 {
1151            for j in 0..20 {
1152                let x = (i as f32 - 10.0) * 0.1;
1153                let y = (j as f32 - 10.0) * 0.1;
1154                let z = 0.0;
1155                points.push(Point3f::new(x, y, z));
1156            }
1157        }
1158        
1159        // Add some isolated points (outliers)
1160        for i in 0..5 {
1161            let x = 10.0 + (i as f32 * 2.0);
1162            let y = 10.0 + (i as f32 * 2.0);
1163            let z = 10.0 + (i as f32 * 2.0);
1164            points.push(Point3f::new(x, y, z));
1165        }
1166        
1167        PointCloud::from_points(points)
1168    }
1169
1170    #[tokio::test]
1171    async fn test_gpu_radius_outlier_removal_basic() {
1172        let gpu_context = match try_create_gpu_context().await {
1173            Some(ctx) => ctx,
1174            None => {
1175                println!("Skipping GPU test - no GPU available");
1176                return;
1177            }
1178        };
1179
1180        let cloud = create_test_point_cloud_for_radius_outlier();
1181        let original_count = cloud.len();
1182        
1183        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1184        let filtered_count = filtered.len();
1185        
1186        // Should remove some outliers
1187        assert!(filtered_count < original_count);
1188        assert!(filtered_count > 0);
1189        
1190        println!("GPU radius outlier removal: {} -> {} points", original_count, filtered_count);
1191    }
1192
1193    #[tokio::test]
1194    async fn test_gpu_radius_outlier_removal_empty_cloud() {
1195        let gpu_context = match try_create_gpu_context().await {
1196            Some(ctx) => ctx,
1197            None => {
1198                println!("Skipping GPU test - no GPU available");
1199                return;
1200            }
1201        };
1202
1203        let cloud = PointCloud::<Point3f>::new();
1204        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1205        
1206        assert_eq!(filtered.len(), 0);
1207    }
1208
1209    #[tokio::test]
1210    async fn test_gpu_radius_outlier_removal_single_point() {
1211        let gpu_context = match try_create_gpu_context().await {
1212            Some(ctx) => ctx,
1213            None => {
1214                println!("Skipping GPU test - no GPU available");
1215                return;
1216            }
1217        };
1218
1219        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1220        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 1).await.unwrap();
1221        
1222        // Should remove the single point since it has no neighbors
1223        assert_eq!(filtered.len(), 0);
1224    }
1225
1226    #[tokio::test]
1227    async fn test_gpu_radius_outlier_removal_different_parameters() {
1228        let gpu_context = match try_create_gpu_context().await {
1229            Some(ctx) => ctx,
1230            None => {
1231                println!("Skipping GPU test - no GPU available");
1232                return;
1233            }
1234        };
1235
1236        let cloud = create_test_point_cloud_for_radius_outlier();
1237        let original_count = cloud.len();
1238        
1239        // Test different radius values
1240        for radius in [0.2, 0.5, 1.0] {
1241            let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, radius, 3).await.unwrap();
1242            println!("radius={}: {} -> {} points", radius, original_count, filtered.len());
1243            assert!(filtered.len() > 0);
1244        }
1245        
1246        // Test different min_neighbors values
1247        for min_neighbors in [1, 3, 5] {
1248            let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, min_neighbors).await.unwrap();
1249            println!("min_neighbors={}: {} -> {} points", min_neighbors, original_count, filtered.len());
1250            assert!(filtered.len() > 0);
1251        }
1252    }
1253
1254    fn create_test_point_cloud_for_voxel_grid() -> PointCloud<Point3f> {
1255        let mut points = Vec::new();
1256        
1257        // Create a dense grid of points
1258        for i in 0..10 {
1259            for j in 0..10 {
1260                for k in 0..5 {
1261                    let x = (i as f32 - 5.0) * 0.1;
1262                    let y = (j as f32 - 5.0) * 0.1;
1263                    let z = (k as f32 - 2.5) * 0.1;
1264                    points.push(Point3f::new(x, y, z));
1265                }
1266            }
1267        }
1268        
1269        // Add some duplicate points in the same voxels
1270        for i in 0..5 {
1271            for j in 0..5 {
1272                let x = (i as f32 - 2.5) * 0.1;
1273                let y = (j as f32 - 2.5) * 0.1;
1274                let z = 0.0;
1275                points.push(Point3f::new(x, y, z));
1276                points.push(Point3f::new(x, y, z)); // Duplicate
1277            }
1278        }
1279        
1280        PointCloud::from_points(points)
1281    }
1282
1283    #[tokio::test]
1284    async fn test_gpu_voxel_grid_filter_basic() {
1285        let gpu_context = match try_create_gpu_context().await {
1286            Some(ctx) => ctx,
1287            None => {
1288                println!("Skipping GPU test - no GPU available");
1289                return;
1290            }
1291        };
1292
1293        let cloud = create_test_point_cloud_for_voxel_grid();
1294        let original_count = cloud.len();
1295        
1296        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1297        let filtered_count = filtered.len();
1298        
1299        // Should reduce the number of points by removing duplicates
1300        assert!(filtered_count < original_count);
1301        assert!(filtered_count > 0);
1302        
1303        println!("GPU voxel grid filter: {} -> {} points", original_count, filtered_count);
1304    }
1305
1306    #[tokio::test]
1307    async fn test_gpu_voxel_grid_filter_empty_cloud() {
1308        let gpu_context = match try_create_gpu_context().await {
1309            Some(ctx) => ctx,
1310            None => {
1311                println!("Skipping GPU test - no GPU available");
1312                return;
1313            }
1314        };
1315
1316        let cloud = PointCloud::<Point3f>::new();
1317        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1318        
1319        assert_eq!(filtered.len(), 0);
1320    }
1321
1322    #[tokio::test]
1323    async fn test_gpu_voxel_grid_filter_single_point() {
1324        let gpu_context = match try_create_gpu_context().await {
1325            Some(ctx) => ctx,
1326            None => {
1327                println!("Skipping GPU test - no GPU available");
1328                return;
1329            }
1330        };
1331
1332        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1333        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1334        
1335        // Should keep the single point
1336        assert_eq!(filtered.len(), 1);
1337    }
1338
1339    #[tokio::test]
1340    async fn test_gpu_voxel_grid_filter_different_voxel_sizes() {
1341        let gpu_context = match try_create_gpu_context().await {
1342            Some(ctx) => ctx,
1343            None => {
1344                println!("Skipping GPU test - no GPU available");
1345                return;
1346            }
1347        };
1348
1349        let cloud = create_test_point_cloud_for_voxel_grid();
1350        let original_count = cloud.len();
1351        
1352        // Test different voxel sizes
1353        for voxel_size in [0.05, 0.1, 0.2] {
1354            let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, voxel_size).await.unwrap();
1355            println!("voxel_size={}: {} -> {} points", voxel_size, original_count, filtered.len());
1356            assert!(filtered.len() > 0);
1357            assert!(filtered.len() <= original_count);
1358        }
1359    }
1360
1361    #[tokio::test]
1362    async fn test_gpu_voxel_grid_filter_invalid_voxel_size() {
1363        let gpu_context = match try_create_gpu_context().await {
1364            Some(ctx) => ctx,
1365            None => {
1366                println!("Skipping GPU test - no GPU available");
1367                return;
1368            }
1369        };
1370
1371        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1372        let result = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.0).await;
1373        assert!(result.is_err());
1374        
1375        let result = gpu_voxel_grid_filter(&gpu_context, &cloud, -1.0).await;
1376        assert!(result.is_err());
1377    }
1378
1379    #[tokio::test]
1380    async fn test_gpu_vs_cpu_performance_comparison() {
1381        let gpu_context = match try_create_gpu_context().await {
1382            Some(ctx) => ctx,
1383            None => {
1384                println!("Skipping GPU vs CPU test - no GPU available");
1385                return;
1386            }
1387        };
1388
1389        let cloud = create_test_point_cloud_for_radius_outlier();
1390        
1391        // GPU radius outlier removal
1392        let gpu_start = Instant::now();
1393        let gpu_filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1394        let gpu_time = gpu_start.elapsed();
1395        
1396        // GPU voxel grid filter
1397        let gpu_voxel_start = Instant::now();
1398        let gpu_voxel_filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1399        let gpu_voxel_time = gpu_voxel_start.elapsed();
1400        
1401        println!("GPU radius outlier removal: {} -> {} points in {:?}", 
1402                cloud.len(), gpu_filtered.len(), gpu_time);
1403        println!("GPU voxel grid filter: {} -> {} points in {:?}", 
1404                cloud.len(), gpu_voxel_filtered.len(), gpu_voxel_time);
1405        
1406        // Both should complete successfully
1407        assert!(gpu_filtered.len() > 0);
1408        assert!(gpu_voxel_filtered.len() > 0);
1409        
1410        // Should complete in reasonable time
1411        assert!(gpu_time.as_secs() < 10);
1412        assert!(gpu_voxel_time.as_secs() < 10);
1413    }
1414}