Skip to main content

threecrate_gpu/
filtering.rs

1//! GPU-accelerated filtering
2
3use threecrate_core::{PointCloud, Result, Point3f};
4use crate::GpuContext;
5
6const STATISTICAL_OUTLIER_SHADER: &str = r#"
7struct GpuPoint {
8    position: vec3<f32>,
9    _padding: f32,  // Ensure 16-byte alignment
10}
11
12@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
13@group(0) @binding(1) var<storage, read> neighbors: array<array<u32, MAX_NEIGHBORS>>;
14@group(0) @binding(2) var<storage, read_write> is_outlier: array<u32>;
15@group(0) @binding(3) var<uniform> params: FilterParams;
16
17struct FilterParams {
18    num_points: u32,
19    k_neighbors: u32,
20    std_dev_threshold: f32,
21    mean_distance: f32,
22}
23
24@compute @workgroup_size(64)
25fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
26    let index = global_id.x;
27    if (index >= params.num_points) {
28        return;
29    }
30    
31    let center_point = input_points[index].position;
32    
33    // Compute mean distance to k-nearest neighbors
34    var total_distance = 0.0;
35    var count = 0u;
36    
37    for (var i = 0u; i < params.k_neighbors; i++) {
38        let neighbor_idx = neighbors[index][i];
39        if (neighbor_idx < params.num_points) {
40            let neighbor_point = input_points[neighbor_idx].position;
41            let distance = length(center_point - neighbor_point);
42            total_distance += distance;
43            count++;
44        }
45    }
46    
47    if (count == 0u) {
48        is_outlier[index] = 0u;
49        return;
50    }
51    
52    let mean_distance = total_distance / f32(count);
53    
54    // Mark as outlier if distance is beyond threshold
55    let deviation = abs(mean_distance - params.mean_distance);
56    is_outlier[index] = select(0u, 1u, deviation > params.std_dev_threshold);
57}
58"#;
59
60const RADIUS_OUTLIER_SHADER: &str = r#"
61struct GpuPoint {
62    position: vec3<f32>,
63    _padding: f32,  // Ensure 16-byte alignment
64}
65
66@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
67@group(0) @binding(1) var<storage, read_write> is_outlier: array<u32>;
68@group(0) @binding(2) var<uniform> params: RadiusOutlierParams;
69
70struct RadiusOutlierParams {
71    num_points: u32,
72    radius: f32,
73    min_neighbors: u32,
74    _padding: u32,
75}
76
77@compute @workgroup_size(64)
78fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
79    let index = global_id.x;
80    if (index >= params.num_points) {
81        return;
82    }
83    
84    let center_point = input_points[index].position;
85    var neighbor_count = 0u;
86    
87    // Count neighbors within radius
88    for (var i = 0u; i < params.num_points; i++) {
89        if (i != index) {
90            let neighbor_point = input_points[i].position;
91            let distance = length(center_point - neighbor_point);
92            if (distance <= params.radius) {
93                neighbor_count++;
94            }
95        }
96    }
97    
98    // Mark as outlier if neighbor count is below threshold
99    is_outlier[index] = select(0u, 1u, neighbor_count < params.min_neighbors);
100}
101"#;
102
103const VOXEL_GRID_SHADER: &str = r#"
104struct GpuPoint {
105    position: vec3<f32>,
106    _padding: f32,  // Ensure 16-byte alignment
107}
108
109@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
110@group(0) @binding(1) var<storage, read_write> voxel_indices: array<u32>;
111@group(0) @binding(2) var<uniform> params: VoxelGridParams;
112
113struct VoxelGridParams {
114    num_points: u32,
115    voxel_size: f32,
116    min_x: f32,
117    min_y: f32,
118    min_z: f32,
119    max_x: f32,
120    max_y: f32,
121    max_z: f32,
122}
123
124fn get_voxel_index(point: vec3<f32>, params: VoxelGridParams) -> u32 {
125    // Simple coordinate-based voxel index (similar to CPU version)
126    let voxel_x = i32(floor((point.x - params.min_x) / params.voxel_size));
127    let voxel_y = i32(floor((point.y - params.min_y) / params.voxel_size));
128    let voxel_z = i32(floor((point.z - params.min_z) / params.voxel_size));
129    
130    // Use a simple hash based on coordinates
131    let hash_x = u32(voxel_x * voxel_x + voxel_x);
132    let hash_y = u32(voxel_y * voxel_y + voxel_y * 7);
133    let hash_z = u32(voxel_z * voxel_z + voxel_z * 13);
134    
135    return (hash_x + hash_y * 31u + hash_z * 97u) % 100000u;
136}
137
138@compute @workgroup_size(64)
139fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
140    let index = global_id.x;
141    if (index >= params.num_points) {
142        return;
143    }
144    
145    let point = input_points[index].position;
146    let voxel_idx = get_voxel_index(point, params);
147    
148    // Store voxel index for this point
149    voxel_indices[index] = voxel_idx;
150}
151"#;
152
153impl GpuContext {
154    /// Remove statistical outliers from point cloud using GPU acceleration
155    pub async fn remove_statistical_outliers(
156        &self,
157        points: &[Point3f],
158        k_neighbors: usize,
159        std_dev_multiplier: f32,
160    ) -> Result<Vec<Point3f>> {
161        if points.is_empty() {
162            return Ok(Vec::new());
163        }
164
165        // Convert points to GPU format with proper alignment
166        #[repr(C)]
167        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
168        struct GpuPoint {
169            position: [f32; 3],
170            _padding: f32,
171        }
172        
173        let point_data: Vec<GpuPoint> = points
174            .iter()
175            .map(|p| GpuPoint {
176                position: [p.x, p.y, p.z],
177                _padding: 0.0,
178            })
179            .collect();
180
181        // Convert to the format expected by helper functions
182        let point_data_flat: Vec<[f32; 3]> = point_data.iter().map(|p| p.position).collect();
183        
184        // Compute neighbors (reuse from normals computation)
185        let neighbors = self.compute_neighbors_simple(&point_data_flat, k_neighbors);
186        
187        // Compute global statistics on CPU first (could be moved to GPU)
188        let global_mean = self.compute_global_mean_distance(&point_data_flat, &neighbors, k_neighbors);
189
190        // Create buffers
191        let input_buffer = self.create_buffer_init(
192            "Input Points",
193            &point_data,
194            wgpu::BufferUsages::STORAGE,
195        );
196
197        let neighbors_buffer = self.create_buffer_init(
198            "Neighbors",
199            &neighbors,
200            wgpu::BufferUsages::STORAGE,
201        );
202
203        let outlier_buffer = self.create_buffer(
204            "Outlier Flags",
205            (point_data.len() * std::mem::size_of::<u32>()) as u64,
206            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
207        );
208
209        #[repr(C)]
210        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
211        struct FilterParams {
212            num_points: u32,
213            k_neighbors: u32,
214            std_dev_threshold: f32,
215            mean_distance: f32,
216        }
217
218        let params = FilterParams {
219            num_points: points.len() as u32,
220            k_neighbors: k_neighbors as u32,
221            std_dev_threshold: global_mean * std_dev_multiplier,
222            mean_distance: global_mean,
223        };
224
225        let params_buffer = self.create_buffer_init(
226            "Filter Params",
227            &[params],
228            wgpu::BufferUsages::UNIFORM,
229        );
230
231        // Create shader with MAX_NEIGHBORS constant
232        let shader_source = STATISTICAL_OUTLIER_SHADER.replace("MAX_NEIGHBORS", &k_neighbors.to_string());
233        let shader = self.create_shader_module("Statistical Outlier Filter", &shader_source);
234
235        // Create bind group layout
236        let bind_group_layout = self.create_bind_group_layout(
237            "Outlier Filter",
238            &[
239                wgpu::BindGroupLayoutEntry {
240                    binding: 0,
241                    visibility: wgpu::ShaderStages::COMPUTE,
242                    ty: wgpu::BindingType::Buffer {
243                        ty: wgpu::BufferBindingType::Storage { read_only: true },
244                        has_dynamic_offset: false,
245                        min_binding_size: None,
246                    },
247                    count: None,
248                },
249                wgpu::BindGroupLayoutEntry {
250                    binding: 1,
251                    visibility: wgpu::ShaderStages::COMPUTE,
252                    ty: wgpu::BindingType::Buffer {
253                        ty: wgpu::BufferBindingType::Storage { read_only: true },
254                        has_dynamic_offset: false,
255                        min_binding_size: None,
256                    },
257                    count: None,
258                },
259                wgpu::BindGroupLayoutEntry {
260                    binding: 2,
261                    visibility: wgpu::ShaderStages::COMPUTE,
262                    ty: wgpu::BindingType::Buffer {
263                        ty: wgpu::BufferBindingType::Storage { read_only: false },
264                        has_dynamic_offset: false,
265                        min_binding_size: None,
266                    },
267                    count: None,
268                },
269                wgpu::BindGroupLayoutEntry {
270                    binding: 3,
271                    visibility: wgpu::ShaderStages::COMPUTE,
272                    ty: wgpu::BindingType::Buffer {
273                        ty: wgpu::BufferBindingType::Uniform,
274                        has_dynamic_offset: false,
275                        min_binding_size: None,
276                    },
277                    count: None,
278                },
279            ],
280        );
281
282        // Create compute pipeline
283        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
284            label: Some("Outlier Filter Pipeline"),
285            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
286                label: Some("Outlier Filter Layout"),
287                bind_group_layouts: &[&bind_group_layout],
288                immediate_size: 0,
289            })),
290            module: &shader,
291            entry_point: Some("main"),
292            compilation_options: wgpu::PipelineCompilationOptions::default(),
293            cache: None,
294        });
295
296        // Create bind group
297        let bind_group = self.create_bind_group(
298            "Outlier Filter",
299            &bind_group_layout,
300            &[
301                wgpu::BindGroupEntry {
302                    binding: 0,
303                    resource: input_buffer.as_entire_binding(),
304                },
305                wgpu::BindGroupEntry {
306                    binding: 1,
307                    resource: neighbors_buffer.as_entire_binding(),
308                },
309                wgpu::BindGroupEntry {
310                    binding: 2,
311                    resource: outlier_buffer.as_entire_binding(),
312                },
313                wgpu::BindGroupEntry {
314                    binding: 3,
315                    resource: params_buffer.as_entire_binding(),
316                },
317            ],
318        );
319
320        // Execute compute shader
321        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
322            label: Some("Outlier Filter"),
323        });
324
325        {
326            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
327                label: Some("Outlier Filter Pass"),
328                timestamp_writes: None,
329            });
330            compute_pass.set_pipeline(&pipeline);
331            compute_pass.set_bind_group(0, &bind_group, &[]);
332            let workgroup_count = (points.len() + 63) / 64;
333            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
334        }
335
336        // Read back results
337        let staging_buffer = self.create_buffer(
338            "Outlier Staging",
339            (point_data.len() * std::mem::size_of::<u32>()) as u64,
340            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
341        );
342
343        encoder.copy_buffer_to_buffer(
344            &outlier_buffer,
345            0,
346            &staging_buffer,
347            0,
348            (point_data.len() * std::mem::size_of::<u32>()) as u64,
349        );
350
351        self.queue.submit(std::iter::once(encoder.finish()));
352
353        // Map and read results
354        let buffer_slice = staging_buffer.slice(..);
355        let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
356        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
357
358        self.device.poll(wgpu::PollType::Wait {
359            submission_index: None,
360            timeout: None,
361        });
362
363        if let Some(Ok(())) = receiver.receive().await {
364            let data = buffer_slice.get_mapped_range();
365            let outlier_flags: Vec<u32> = bytemuck::cast_slice(&data).to_vec();
366            
367            let filtered_points: Vec<Point3f> = points
368                .iter()
369                .zip(outlier_flags.iter())
370                .filter(|&(_, &is_outlier)| is_outlier == 0)
371                .map(|(point, _)| *point)
372                .collect();
373            
374            drop(data);
375            staging_buffer.unmap();
376            
377            Ok(filtered_points)
378        } else {
379            Err(threecrate_core::Error::Gpu("Failed to read GPU filtering results".to_string()))
380        }
381    }
382
383    /// Compute global mean distance for statistical filtering
384    fn compute_global_mean_distance(&self, points: &[[f32; 3]], neighbors: &[[u32; 64]], k: usize) -> f32 {
385        let k = k.min(64).min(points.len());
386        let mut total_distance = 0.0;
387        let mut count = 0;
388
389        for (i, point) in points.iter().enumerate() {
390            for j in 0..k {
391                let neighbor_idx = neighbors[i][j] as usize;
392                if neighbor_idx < points.len() {
393                    let neighbor_point = &points[neighbor_idx];
394                    let dx = point[0] - neighbor_point[0];
395                    let dy = point[1] - neighbor_point[1];
396                    let dz = point[2] - neighbor_point[2];
397                    let distance = (dx * dx + dy * dy + dz * dz).sqrt();
398                    total_distance += distance;
399                    count += 1;
400                }
401            }
402        }
403
404        if count > 0 {
405            total_distance / count as f32
406        } else {
407            0.0
408        }
409    }
410
411    /// Remove radius outliers from point cloud using GPU acceleration
412    pub async fn remove_radius_outliers(
413        &self,
414        points: &[Point3f],
415        radius: f32,
416        min_neighbors: usize,
417    ) -> Result<Vec<Point3f>> {
418        if points.is_empty() {
419            return Ok(Vec::new());
420        }
421
422        // Convert points to GPU format with proper alignment
423        #[repr(C)]
424        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
425        struct GpuPoint {
426            position: [f32; 3],
427            _padding: f32,
428        }
429        
430        let point_data: Vec<GpuPoint> = points
431            .iter()
432            .map(|p| GpuPoint {
433                position: [p.x, p.y, p.z],
434                _padding: 0.0,
435            })
436            .collect();
437
438        // Create buffers
439        let input_buffer = self.create_buffer_init(
440            "Input Points",
441            &point_data,
442            wgpu::BufferUsages::STORAGE,
443        );
444
445        let outlier_buffer = self.create_buffer(
446            "Outlier Flags",
447            (point_data.len() * std::mem::size_of::<u32>()) as u64,
448            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
449        );
450
451        #[repr(C)]
452        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
453        struct RadiusOutlierParams {
454            num_points: u32,
455            radius: f32,
456            min_neighbors: u32,
457            _padding: u32,
458        }
459
460        let params = RadiusOutlierParams {
461            num_points: points.len() as u32,
462            radius,
463            min_neighbors: min_neighbors as u32,
464            _padding: 0,
465        };
466
467        let params_buffer = self.create_buffer_init(
468            "Radius Outlier Params",
469            &[params],
470            wgpu::BufferUsages::UNIFORM,
471        );
472
473        // Create shader
474        let shader = self.create_shader_module("Radius Outlier Filter", RADIUS_OUTLIER_SHADER);
475
476        // Create bind group layout
477        let bind_group_layout = self.create_bind_group_layout(
478            "Radius Outlier Filter",
479            &[
480                wgpu::BindGroupLayoutEntry {
481                    binding: 0,
482                    visibility: wgpu::ShaderStages::COMPUTE,
483                    ty: wgpu::BindingType::Buffer {
484                        ty: wgpu::BufferBindingType::Storage { read_only: true },
485                        has_dynamic_offset: false,
486                        min_binding_size: None,
487                    },
488                    count: None,
489                },
490                wgpu::BindGroupLayoutEntry {
491                    binding: 1,
492                    visibility: wgpu::ShaderStages::COMPUTE,
493                    ty: wgpu::BindingType::Buffer {
494                        ty: wgpu::BufferBindingType::Storage { read_only: false },
495                        has_dynamic_offset: false,
496                        min_binding_size: None,
497                    },
498                    count: None,
499                },
500                wgpu::BindGroupLayoutEntry {
501                    binding: 2,
502                    visibility: wgpu::ShaderStages::COMPUTE,
503                    ty: wgpu::BindingType::Buffer {
504                        ty: wgpu::BufferBindingType::Uniform,
505                        has_dynamic_offset: false,
506                        min_binding_size: None,
507                    },
508                    count: None,
509                },
510            ],
511        );
512
513        // Create compute pipeline
514        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
515            label: Some("Radius Outlier Filter Pipeline"),
516            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
517                label: Some("Radius Outlier Filter Layout"),
518                bind_group_layouts: &[&bind_group_layout],
519                immediate_size: 0,
520            })),
521            module: &shader,
522            entry_point: Some("main"),
523            compilation_options: wgpu::PipelineCompilationOptions::default(),
524            cache: None,
525        });
526
527        // Create bind group
528        let bind_group = self.create_bind_group(
529            "Radius Outlier Filter",
530            &bind_group_layout,
531            &[
532                wgpu::BindGroupEntry {
533                    binding: 0,
534                    resource: input_buffer.as_entire_binding(),
535                },
536                wgpu::BindGroupEntry {
537                    binding: 1,
538                    resource: outlier_buffer.as_entire_binding(),
539                },
540                wgpu::BindGroupEntry {
541                    binding: 2,
542                    resource: params_buffer.as_entire_binding(),
543                },
544            ],
545        );
546
547        // Execute compute shader
548        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
549            label: Some("Radius Outlier Filter"),
550        });
551
552        {
553            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
554                label: Some("Radius Outlier Filter Pass"),
555                timestamp_writes: None,
556            });
557            compute_pass.set_pipeline(&pipeline);
558            compute_pass.set_bind_group(0, &bind_group, &[]);
559            let workgroup_count = (points.len() + 63) / 64;
560            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
561        }
562
563        // Read back results
564        let staging_buffer = self.create_buffer(
565            "Radius Outlier Staging",
566            (point_data.len() * std::mem::size_of::<u32>()) as u64,
567            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
568        );
569
570        encoder.copy_buffer_to_buffer(
571            &outlier_buffer,
572            0,
573            &staging_buffer,
574            0,
575            (point_data.len() * std::mem::size_of::<u32>()) as u64,
576        );
577
578        self.queue.submit(std::iter::once(encoder.finish()));
579
580        // Map and read results
581        let buffer_slice = staging_buffer.slice(..);
582        let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
583        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
584
585        self.device.poll(wgpu::PollType::Wait {
586            submission_index: None,
587            timeout: None,
588        });
589
590        if let Some(Ok(())) = receiver.receive().await {
591            let data = buffer_slice.get_mapped_range();
592            let outlier_flags: Vec<u32> = bytemuck::cast_slice(&data).to_vec();
593            
594            let filtered_points: Vec<Point3f> = points
595                .iter()
596                .zip(outlier_flags.iter())
597                .filter(|&(_, &is_outlier)| is_outlier == 0)
598                .map(|(point, _)| *point)
599                .collect();
600            
601            drop(data);
602            staging_buffer.unmap();
603            
604            Ok(filtered_points)
605        } else {
606            Err(threecrate_core::Error::Gpu("Failed to read GPU radius outlier filtering results".to_string()))
607        }
608    }
609
610    /// Voxel grid filtering using GPU acceleration
611    pub async fn voxel_grid_filter(
612        &self,
613        points: &[Point3f],
614        voxel_size: f32,
615    ) -> Result<Vec<Point3f>> {
616        if points.is_empty() {
617            return Ok(Vec::new());
618        }
619
620        if voxel_size <= 0.0 {
621            return Err(threecrate_core::Error::InvalidData("Voxel size must be positive".to_string()));
622        }
623
624        // Convert points to GPU format
625        #[repr(C)]
626        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
627        struct GpuPoint {
628            position: [f32; 3],
629            _padding: f32,
630        }
631        
632        let point_data: Vec<GpuPoint> = points
633            .iter()
634            .map(|p| GpuPoint {
635                position: [p.x, p.y, p.z],
636                _padding: 0.0,
637            })
638            .collect();
639
640        // Compute grid bounds
641        let min_x = points.iter().map(|p| p.x).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
642        let min_y = points.iter().map(|p| p.y).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
643        let min_z = points.iter().map(|p| p.z).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
644        let max_x = points.iter().map(|p| p.x).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
645        let max_y = points.iter().map(|p| p.y).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
646        let max_z = points.iter().map(|p| p.z).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
647
648        // Estimate total voxels for buffer allocation (we'll use a hash-based approach)
649        let total_voxels = points.len() * 2; // Conservative estimate
650
651        // Create buffers
652        let input_buffer = self.create_buffer_init(
653            "Input Points",
654            &point_data,
655            wgpu::BufferUsages::STORAGE,
656        );
657
658        let voxel_indices_buffer = self.create_buffer(
659            "Voxel Indices",
660            (point_data.len() * std::mem::size_of::<u32>()) as u64,
661            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
662        );
663
664
665
666        #[repr(C)]
667        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
668        struct VoxelGridParams {
669            num_points: u32,
670            voxel_size: f32,
671            min_x: f32,
672            min_y: f32,
673            min_z: f32,
674            max_x: f32,
675            max_y: f32,
676            max_z: f32,
677        }
678
679        let params = VoxelGridParams {
680            num_points: points.len() as u32,
681            voxel_size,
682            min_x,
683            min_y,
684            min_z,
685            max_x,
686            max_y,
687            max_z,
688        };
689
690        let params_buffer = self.create_buffer_init(
691            "Voxel Grid Params",
692            &[params],
693            wgpu::BufferUsages::UNIFORM,
694        );
695
696        // Create shader
697        let shader = self.create_shader_module("Voxel Grid Filter", VOXEL_GRID_SHADER);
698
699        // Create bind group layout
700        let bind_group_layout = self.create_bind_group_layout(
701            "Voxel Grid Filter",
702            &[
703                wgpu::BindGroupLayoutEntry {
704                    binding: 0,
705                    visibility: wgpu::ShaderStages::COMPUTE,
706                    ty: wgpu::BindingType::Buffer {
707                        ty: wgpu::BufferBindingType::Storage { read_only: true },
708                        has_dynamic_offset: false,
709                        min_binding_size: None,
710                    },
711                    count: None,
712                },
713                wgpu::BindGroupLayoutEntry {
714                    binding: 1,
715                    visibility: wgpu::ShaderStages::COMPUTE,
716                    ty: wgpu::BindingType::Buffer {
717                        ty: wgpu::BufferBindingType::Storage { read_only: false },
718                        has_dynamic_offset: false,
719                        min_binding_size: None,
720                    },
721                    count: None,
722                },
723                wgpu::BindGroupLayoutEntry {
724                    binding: 2,
725                    visibility: wgpu::ShaderStages::COMPUTE,
726                    ty: wgpu::BindingType::Buffer {
727                        ty: wgpu::BufferBindingType::Uniform,
728                        has_dynamic_offset: false,
729                        min_binding_size: None,
730                    },
731                    count: None,
732                },
733            ],
734        );
735
736        // Create compute pipeline
737        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
738            label: Some("Voxel Grid Filter Pipeline"),
739            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
740                label: Some("Voxel Grid Filter Layout"),
741                bind_group_layouts: &[&bind_group_layout],
742                immediate_size: 0,
743            })),
744            module: &shader,
745            entry_point: Some("main"),
746            compilation_options: wgpu::PipelineCompilationOptions::default(),
747            cache: None,
748        });
749
750        // Create bind group
751        let bind_group = self.create_bind_group(
752            "Voxel Grid Filter",
753            &bind_group_layout,
754            &[
755                wgpu::BindGroupEntry {
756                    binding: 0,
757                    resource: input_buffer.as_entire_binding(),
758                },
759                wgpu::BindGroupEntry {
760                    binding: 1,
761                    resource: voxel_indices_buffer.as_entire_binding(),
762                },
763                wgpu::BindGroupEntry {
764                    binding: 2,
765                    resource: params_buffer.as_entire_binding(),
766                },
767            ],
768        );
769
770        // Execute compute shader
771        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
772            label: Some("Voxel Grid Filter"),
773        });
774
775        {
776            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
777                label: Some("Voxel Grid Filter Pass"),
778                timestamp_writes: None,
779            });
780            compute_pass.set_pipeline(&pipeline);
781            compute_pass.set_bind_group(0, &bind_group, &[]);
782            let workgroup_count = (points.len() + 63) / 64;
783            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
784        }
785
786        // Read back voxel indices
787        let indices_staging = self.create_buffer(
788            "Voxel Indices Staging",
789            (point_data.len() * std::mem::size_of::<u32>()) as u64,
790            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
791        );
792
793        encoder.copy_buffer_to_buffer(
794            &voxel_indices_buffer,
795            0,
796            &indices_staging,
797            0,
798            (point_data.len() * std::mem::size_of::<u32>()) as u64,
799        );
800
801        self.queue.submit(std::iter::once(encoder.finish()));
802
803        // Map and read results
804        let indices_slice = indices_staging.slice(..);
805        
806        let (indices_sender, indices_receiver) = futures_intrusive::channel::shared::oneshot_channel();
807        
808        indices_slice.map_async(wgpu::MapMode::Read, move |v| indices_sender.send(v).unwrap());
809
810        self.device.poll(wgpu::PollType::Wait {
811            submission_index: None,
812            timeout: None,
813        });
814
815        if let Some(Ok(())) = indices_receiver.receive().await {
816            let indices_data = indices_slice.get_mapped_range();
817            
818            let voxel_indices: Vec<u32> = bytemuck::cast_slice(&indices_data).to_vec();
819            
820            // Post-process on CPU: keep one point per voxel (the first one)
821            let mut voxel_used = vec![false; total_voxels];
822            let mut filtered_points = Vec::new();
823            
824            for (point_idx, &voxel_idx) in voxel_indices.iter().enumerate() {
825                if voxel_idx < total_voxels as u32 && !voxel_used[voxel_idx as usize] {
826                    filtered_points.push(points[point_idx]);
827                    voxel_used[voxel_idx as usize] = true;
828                }
829            }
830            
831            drop(indices_data);
832            indices_staging.unmap();
833            
834            Ok(filtered_points)
835        } else {
836            Err(threecrate_core::Error::Gpu("Failed to read GPU voxel grid filtering results".to_string()))
837        }
838    }
839}
840
841/// GPU-accelerated statistical outlier removal
842pub async fn gpu_remove_statistical_outliers(
843    gpu_context: &GpuContext,
844    cloud: &PointCloud<Point3f>,
845    k_neighbors: usize,
846    std_dev_multiplier: f32,
847) -> Result<PointCloud<Point3f>> {
848    let filtered_points = gpu_context.remove_statistical_outliers(&cloud.points, k_neighbors, std_dev_multiplier).await?;
849    Ok(PointCloud::from_points(filtered_points))
850}
851
852/// GPU-accelerated radius outlier removal
853pub async fn gpu_radius_outlier_removal(
854    gpu_context: &GpuContext,
855    cloud: &PointCloud<Point3f>,
856    radius: f32,
857    min_neighbors: usize,
858) -> Result<PointCloud<Point3f>> {
859    let filtered_points = gpu_context.remove_radius_outliers(&cloud.points, radius, min_neighbors).await?;
860    Ok(PointCloud::from_points(filtered_points))
861}
862
863/// GPU-accelerated voxel grid filtering
864pub async fn gpu_voxel_grid_filter(
865    gpu_context: &GpuContext,
866    cloud: &PointCloud<Point3f>,
867    voxel_size: f32,
868) -> Result<PointCloud<Point3f>> {
869    let filtered_points = gpu_context.voxel_grid_filter(&cloud.points, voxel_size).await?;
870    Ok(PointCloud::from_points(filtered_points))
871} 
872
873#[cfg(test)]
874mod tests {
875    use super::*;
876    use threecrate_core::{PointCloud, Point3f};
877    use std::time::Instant;
878
879    async fn try_create_gpu_context() -> Option<GpuContext> {
880        GpuContext::new().await.ok()
881    }
882
883    fn create_test_point_cloud_with_outliers() -> PointCloud<Point3f> {
884        let mut points = Vec::new();
885        
886        // Create a cluster of normal points
887        for i in 0..50 {
888            for j in 0..50 {
889                let x = (i as f32 - 25.0) * 0.1;
890                let y = (j as f32 - 25.0) * 0.1;
891                let z = 0.0;
892                points.push(Point3f::new(x, y, z));
893            }
894        }
895        
896        // Add some outliers far from the cluster
897        for i in 0..20 {
898            let x = 50.0 + (i as f32 * 2.0);
899            let y = 50.0 + (i as f32 * 2.0);
900            let z = 50.0 + (i as f32 * 2.0);
901            points.push(Point3f::new(x, y, z));
902        }
903        
904        PointCloud::from_points(points)
905    }
906
907    fn create_test_point_cloud_no_outliers() -> PointCloud<Point3f> {
908        let mut points = Vec::new();
909        
910        // Create a uniform grid of points
911        for i in 0..25 {
912            for j in 0..25 {
913                for k in 0..5 {
914                    let x = (i as f32 - 12.5) * 0.1;
915                    let y = (j as f32 - 12.5) * 0.1;
916                    let z = (k as f32 - 2.5) * 0.1;
917                    points.push(Point3f::new(x, y, z));
918                }
919            }
920        }
921        
922        PointCloud::from_points(points)
923    }
924
925    #[tokio::test]
926    async fn test_gpu_statistical_outlier_removal_basic() {
927        let gpu_context = match try_create_gpu_context().await {
928            Some(ctx) => ctx,
929            None => {
930                println!("Skipping GPU test - no GPU available");
931                return;
932            }
933        };
934
935        let cloud = create_test_point_cloud_with_outliers();
936        let original_count = cloud.len();
937        
938        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
939        let filtered_count = filtered.len();
940        
941        // Should remove some outliers
942        assert!(filtered_count < original_count);
943        assert!(filtered_count > 0);
944        
945        println!("GPU outlier removal: {} -> {} points", original_count, filtered_count);
946    }
947
948    #[tokio::test]
949    async fn test_gpu_statistical_outlier_removal_no_outliers() {
950        let gpu_context = match try_create_gpu_context().await {
951            Some(ctx) => ctx,
952            None => {
953                println!("Skipping GPU test - no GPU available");
954                return;
955            }
956        };
957
958        let cloud = create_test_point_cloud_no_outliers();
959        let original_count = cloud.len();
960        
961        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
962        let filtered_count = filtered.len();
963        
964        println!("GPU outlier removal (no outliers): {} -> {} points", original_count, filtered_count);
965        
966        // The algorithm should remove some points but not all
967        // Even with no obvious outliers, the algorithm may remove points based on statistical analysis
968        assert!(filtered_count > 0);
969        assert!(filtered_count < original_count);
970    }
971
972    #[tokio::test]
973    async fn test_gpu_statistical_outlier_removal_empty_cloud() {
974        let gpu_context = match try_create_gpu_context().await {
975            Some(ctx) => ctx,
976            None => {
977                println!("Skipping GPU test - no GPU available");
978                return;
979            }
980        };
981
982        let cloud = PointCloud::<Point3f>::new();
983        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
984        
985        assert_eq!(filtered.len(), 0);
986    }
987
988    #[tokio::test]
989    async fn test_gpu_statistical_outlier_removal_single_point() {
990        let gpu_context = match try_create_gpu_context().await {
991            Some(ctx) => ctx,
992            None => {
993                println!("Skipping GPU test - no GPU available");
994                return;
995            }
996        };
997
998        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
999        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 1, 1.0).await.unwrap();
1000        
1001        // Should keep the single point
1002        assert_eq!(filtered.len(), 1);
1003    }
1004
1005    #[tokio::test]
1006    async fn test_gpu_vs_cpu_statistical_outlier_removal() {
1007        let gpu_context = match try_create_gpu_context().await {
1008            Some(ctx) => ctx,
1009            None => {
1010                println!("Skipping GPU vs CPU test - no GPU available");
1011                return;
1012            }
1013        };
1014
1015        let cloud = create_test_point_cloud_with_outliers();
1016        
1017        // CPU version - use the same algorithm logic but avoid type conflicts
1018        let cpu_start = Instant::now();
1019        let cpu_filtered_count = {
1020            // Simple CPU implementation for comparison
1021            let points = &cloud.points;
1022            let k = 10;
1023            let std_dev_multiplier = 1.0;
1024            
1025            if points.is_empty() {
1026                0
1027            } else {
1028                // Simple distance-based outlier detection
1029                let mut mean_distances = Vec::new();
1030                for point in points {
1031                    let mut distances = Vec::new();
1032                    for other_point in points {
1033                        if other_point != point {
1034                            let dx = point.x - other_point.x;
1035                            let dy = point.y - other_point.y;
1036                            let dz = point.z - other_point.z;
1037                            let distance = (dx * dx + dy * dy + dz * dz).sqrt();
1038                            distances.push(distance);
1039                        }
1040                    }
1041                    distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
1042                    let k_neighbors = k.min(distances.len());
1043                    if k_neighbors > 0 {
1044                        let mean = distances[..k_neighbors].iter().sum::<f32>() / k_neighbors as f32;
1045                        mean_distances.push(mean);
1046                    } else {
1047                        mean_distances.push(0.0);
1048                    }
1049                }
1050                
1051                let global_mean = mean_distances.iter().sum::<f32>() / mean_distances.len() as f32;
1052                let variance = mean_distances.iter().map(|&d| (d - global_mean).powi(2)).sum::<f32>() / mean_distances.len() as f32;
1053                let global_std_dev = variance.sqrt();
1054                let threshold = global_mean + std_dev_multiplier * global_std_dev;
1055                
1056                mean_distances.iter().filter(|&&d| d <= threshold).count()
1057            }
1058        };
1059        let cpu_time = cpu_start.elapsed();
1060        
1061        // GPU version
1062        let gpu_start = Instant::now();
1063        let gpu_filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
1064        let gpu_time = gpu_start.elapsed();
1065        
1066        println!("CPU outlier removal: {} -> {} points in {:?}", 
1067                cloud.len(), cpu_filtered_count, cpu_time);
1068        println!("GPU outlier removal: {} -> {} points in {:?}", 
1069                cloud.len(), gpu_filtered.len(), gpu_time);
1070        
1071        // Both should remove outliers (exact counts may vary due to different algorithms)
1072        assert!(cpu_filtered_count < cloud.len());
1073        assert!(gpu_filtered.len() < cloud.len());
1074        
1075        // Performance comparison (GPU should be faster for larger datasets)
1076        if cloud.len() > 1000 {
1077            println!("GPU speedup: {:.2}x", cpu_time.as_secs_f32() / gpu_time.as_secs_f32());
1078        }
1079    }
1080
1081    #[tokio::test]
1082    async fn test_gpu_statistical_outlier_removal_different_parameters() {
1083        let gpu_context = match try_create_gpu_context().await {
1084            Some(ctx) => ctx,
1085            None => {
1086                println!("Skipping GPU test - no GPU available");
1087                return;
1088            }
1089        };
1090
1091        let cloud = create_test_point_cloud_with_outliers();
1092        let original_count = cloud.len();
1093        
1094        // Test different k values
1095        for k in [5, 10, 20] {
1096            let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, k, 1.0).await.unwrap();
1097            println!("k={}: {} -> {} points", k, original_count, filtered.len());
1098            assert!(filtered.len() > 0);
1099        }
1100        
1101        // Test different std_dev_multiplier values
1102        for std_dev in [0.5, 1.0, 2.0] {
1103            let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, std_dev).await.unwrap();
1104            println!("std_dev={}: {} -> {} points", std_dev, original_count, filtered.len());
1105            assert!(filtered.len() > 0);
1106        }
1107    }
1108
1109    #[tokio::test]
1110    async fn test_gpu_statistical_outlier_removal_large_dataset() {
1111        let gpu_context = match try_create_gpu_context().await {
1112            Some(ctx) => ctx,
1113            None => {
1114                println!("Skipping GPU test - no GPU available");
1115                return;
1116            }
1117        };
1118
1119        // Create a smaller dataset for faster testing
1120        let mut points = Vec::new();
1121        for i in 0..50 {
1122            for j in 0..50 {
1123                let x = (i as f32 - 25.0) * 0.1;
1124                let y = (j as f32 - 25.0) * 0.1;
1125                let z = 0.0;
1126                points.push(Point3f::new(x, y, z));
1127            }
1128        }
1129        
1130        // Add some outliers
1131        for i in 0..10 {
1132            let x = 25.0 + (i as f32 * 2.0);
1133            let y = 25.0 + (i as f32 * 2.0);
1134            let z = 25.0 + (i as f32 * 2.0);
1135            points.push(Point3f::new(x, y, z));
1136        }
1137        
1138        let cloud = PointCloud::from_points(points);
1139        let original_count = cloud.len();
1140        
1141        let start = Instant::now();
1142        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 5, 1.0).await.unwrap();
1143        let elapsed = start.elapsed();
1144        
1145        println!("Large dataset GPU outlier removal: {} -> {} points in {:?}", 
1146                original_count, filtered.len(), elapsed);
1147        
1148        assert!(filtered.len() < original_count);
1149        assert!(filtered.len() > 0);
1150        
1151        // Should complete in reasonable time
1152        assert!(elapsed.as_secs() < 10);
1153    }
1154
1155    fn create_test_point_cloud_for_radius_outlier() -> PointCloud<Point3f> {
1156        let mut points = Vec::new();
1157        
1158        // Create a dense cluster
1159        for i in 0..20 {
1160            for j in 0..20 {
1161                let x = (i as f32 - 10.0) * 0.1;
1162                let y = (j as f32 - 10.0) * 0.1;
1163                let z = 0.0;
1164                points.push(Point3f::new(x, y, z));
1165            }
1166        }
1167        
1168        // Add some isolated points (outliers)
1169        for i in 0..5 {
1170            let x = 10.0 + (i as f32 * 2.0);
1171            let y = 10.0 + (i as f32 * 2.0);
1172            let z = 10.0 + (i as f32 * 2.0);
1173            points.push(Point3f::new(x, y, z));
1174        }
1175        
1176        PointCloud::from_points(points)
1177    }
1178
1179    #[tokio::test]
1180    async fn test_gpu_radius_outlier_removal_basic() {
1181        let gpu_context = match try_create_gpu_context().await {
1182            Some(ctx) => ctx,
1183            None => {
1184                println!("Skipping GPU test - no GPU available");
1185                return;
1186            }
1187        };
1188
1189        let cloud = create_test_point_cloud_for_radius_outlier();
1190        let original_count = cloud.len();
1191        
1192        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1193        let filtered_count = filtered.len();
1194        
1195        // Should remove some outliers
1196        assert!(filtered_count < original_count);
1197        assert!(filtered_count > 0);
1198        
1199        println!("GPU radius outlier removal: {} -> {} points", original_count, filtered_count);
1200    }
1201
1202    #[tokio::test]
1203    async fn test_gpu_radius_outlier_removal_empty_cloud() {
1204        let gpu_context = match try_create_gpu_context().await {
1205            Some(ctx) => ctx,
1206            None => {
1207                println!("Skipping GPU test - no GPU available");
1208                return;
1209            }
1210        };
1211
1212        let cloud = PointCloud::<Point3f>::new();
1213        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1214        
1215        assert_eq!(filtered.len(), 0);
1216    }
1217
1218    #[tokio::test]
1219    async fn test_gpu_radius_outlier_removal_single_point() {
1220        let gpu_context = match try_create_gpu_context().await {
1221            Some(ctx) => ctx,
1222            None => {
1223                println!("Skipping GPU test - no GPU available");
1224                return;
1225            }
1226        };
1227
1228        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1229        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 1).await.unwrap();
1230        
1231        // Should remove the single point since it has no neighbors
1232        assert_eq!(filtered.len(), 0);
1233    }
1234
1235    #[tokio::test]
1236    async fn test_gpu_radius_outlier_removal_different_parameters() {
1237        let gpu_context = match try_create_gpu_context().await {
1238            Some(ctx) => ctx,
1239            None => {
1240                println!("Skipping GPU test - no GPU available");
1241                return;
1242            }
1243        };
1244
1245        let cloud = create_test_point_cloud_for_radius_outlier();
1246        let original_count = cloud.len();
1247        
1248        // Test different radius values
1249        for radius in [0.2, 0.5, 1.0] {
1250            let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, radius, 3).await.unwrap();
1251            println!("radius={}: {} -> {} points", radius, original_count, filtered.len());
1252            assert!(filtered.len() > 0);
1253        }
1254        
1255        // Test different min_neighbors values
1256        for min_neighbors in [1, 3, 5] {
1257            let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, min_neighbors).await.unwrap();
1258            println!("min_neighbors={}: {} -> {} points", min_neighbors, original_count, filtered.len());
1259            assert!(filtered.len() > 0);
1260        }
1261    }
1262
1263    fn create_test_point_cloud_for_voxel_grid() -> PointCloud<Point3f> {
1264        let mut points = Vec::new();
1265        
1266        // Create a dense grid of points
1267        for i in 0..10 {
1268            for j in 0..10 {
1269                for k in 0..5 {
1270                    let x = (i as f32 - 5.0) * 0.1;
1271                    let y = (j as f32 - 5.0) * 0.1;
1272                    let z = (k as f32 - 2.5) * 0.1;
1273                    points.push(Point3f::new(x, y, z));
1274                }
1275            }
1276        }
1277        
1278        // Add some duplicate points in the same voxels
1279        for i in 0..5 {
1280            for j in 0..5 {
1281                let x = (i as f32 - 2.5) * 0.1;
1282                let y = (j as f32 - 2.5) * 0.1;
1283                let z = 0.0;
1284                points.push(Point3f::new(x, y, z));
1285                points.push(Point3f::new(x, y, z)); // Duplicate
1286            }
1287        }
1288        
1289        PointCloud::from_points(points)
1290    }
1291
1292    #[tokio::test]
1293    async fn test_gpu_voxel_grid_filter_basic() {
1294        let gpu_context = match try_create_gpu_context().await {
1295            Some(ctx) => ctx,
1296            None => {
1297                println!("Skipping GPU test - no GPU available");
1298                return;
1299            }
1300        };
1301
1302        let cloud = create_test_point_cloud_for_voxel_grid();
1303        let original_count = cloud.len();
1304        
1305        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1306        let filtered_count = filtered.len();
1307        
1308        // Should reduce the number of points by removing duplicates
1309        assert!(filtered_count < original_count);
1310        assert!(filtered_count > 0);
1311        
1312        println!("GPU voxel grid filter: {} -> {} points", original_count, filtered_count);
1313    }
1314
1315    #[tokio::test]
1316    async fn test_gpu_voxel_grid_filter_empty_cloud() {
1317        let gpu_context = match try_create_gpu_context().await {
1318            Some(ctx) => ctx,
1319            None => {
1320                println!("Skipping GPU test - no GPU available");
1321                return;
1322            }
1323        };
1324
1325        let cloud = PointCloud::<Point3f>::new();
1326        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1327        
1328        assert_eq!(filtered.len(), 0);
1329    }
1330
1331    #[tokio::test]
1332    async fn test_gpu_voxel_grid_filter_single_point() {
1333        let gpu_context = match try_create_gpu_context().await {
1334            Some(ctx) => ctx,
1335            None => {
1336                println!("Skipping GPU test - no GPU available");
1337                return;
1338            }
1339        };
1340
1341        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1342        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1343        
1344        // Should keep the single point
1345        assert_eq!(filtered.len(), 1);
1346    }
1347
1348    #[tokio::test]
1349    async fn test_gpu_voxel_grid_filter_different_voxel_sizes() {
1350        let gpu_context = match try_create_gpu_context().await {
1351            Some(ctx) => ctx,
1352            None => {
1353                println!("Skipping GPU test - no GPU available");
1354                return;
1355            }
1356        };
1357
1358        let cloud = create_test_point_cloud_for_voxel_grid();
1359        let original_count = cloud.len();
1360        
1361        // Test different voxel sizes
1362        for voxel_size in [0.05, 0.1, 0.2] {
1363            let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, voxel_size).await.unwrap();
1364            println!("voxel_size={}: {} -> {} points", voxel_size, original_count, filtered.len());
1365            assert!(filtered.len() > 0);
1366            assert!(filtered.len() <= original_count);
1367        }
1368    }
1369
1370    #[tokio::test]
1371    async fn test_gpu_voxel_grid_filter_invalid_voxel_size() {
1372        let gpu_context = match try_create_gpu_context().await {
1373            Some(ctx) => ctx,
1374            None => {
1375                println!("Skipping GPU test - no GPU available");
1376                return;
1377            }
1378        };
1379
1380        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1381        let result = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.0).await;
1382        assert!(result.is_err());
1383        
1384        let result = gpu_voxel_grid_filter(&gpu_context, &cloud, -1.0).await;
1385        assert!(result.is_err());
1386    }
1387
1388    #[tokio::test]
1389    async fn test_gpu_vs_cpu_performance_comparison() {
1390        let gpu_context = match try_create_gpu_context().await {
1391            Some(ctx) => ctx,
1392            None => {
1393                println!("Skipping GPU vs CPU test - no GPU available");
1394                return;
1395            }
1396        };
1397
1398        let cloud = create_test_point_cloud_for_radius_outlier();
1399        
1400        // GPU radius outlier removal
1401        let gpu_start = Instant::now();
1402        let gpu_filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1403        let gpu_time = gpu_start.elapsed();
1404        
1405        // GPU voxel grid filter
1406        let gpu_voxel_start = Instant::now();
1407        let gpu_voxel_filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1408        let gpu_voxel_time = gpu_voxel_start.elapsed();
1409        
1410        println!("GPU radius outlier removal: {} -> {} points in {:?}", 
1411                cloud.len(), gpu_filtered.len(), gpu_time);
1412        println!("GPU voxel grid filter: {} -> {} points in {:?}", 
1413                cloud.len(), gpu_voxel_filtered.len(), gpu_voxel_time);
1414        
1415        // Both should complete successfully
1416        assert!(gpu_filtered.len() > 0);
1417        assert!(gpu_voxel_filtered.len() > 0);
1418        
1419        // Should complete in reasonable time
1420        assert!(gpu_time.as_secs() < 10);
1421        assert!(gpu_voxel_time.as_secs() < 10);
1422    }
1423}