threecrate_gpu/
filtering.rs

1//! GPU-accelerated filtering
2
3use threecrate_core::{PointCloud, Result, Point3f};
4use crate::GpuContext;
5
6const STATISTICAL_OUTLIER_SHADER: &str = r#"
7struct GpuPoint {
8    position: vec3<f32>,
9    _padding: f32,  // Ensure 16-byte alignment
10}
11
12@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
13@group(0) @binding(1) var<storage, read> neighbors: array<array<u32, MAX_NEIGHBORS>>;
14@group(0) @binding(2) var<storage, read_write> is_outlier: array<u32>;
15@group(0) @binding(3) var<uniform> params: FilterParams;
16
17struct FilterParams {
18    num_points: u32,
19    k_neighbors: u32,
20    std_dev_threshold: f32,
21    mean_distance: f32,
22}
23
24@compute @workgroup_size(64)
25fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
26    let index = global_id.x;
27    if (index >= params.num_points) {
28        return;
29    }
30    
31    let center_point = input_points[index].position;
32    
33    // Compute mean distance to k-nearest neighbors
34    var total_distance = 0.0;
35    var count = 0u;
36    
37    for (var i = 0u; i < params.k_neighbors; i++) {
38        let neighbor_idx = neighbors[index][i];
39        if (neighbor_idx < params.num_points) {
40            let neighbor_point = input_points[neighbor_idx].position;
41            let distance = length(center_point - neighbor_point);
42            total_distance += distance;
43            count++;
44        }
45    }
46    
47    if (count == 0u) {
48        is_outlier[index] = 0u;
49        return;
50    }
51    
52    let mean_distance = total_distance / f32(count);
53    
54    // Mark as outlier if distance is beyond threshold
55    let deviation = abs(mean_distance - params.mean_distance);
56    is_outlier[index] = select(0u, 1u, deviation > params.std_dev_threshold);
57}
58"#;
59
60const RADIUS_OUTLIER_SHADER: &str = r#"
61struct GpuPoint {
62    position: vec3<f32>,
63    _padding: f32,  // Ensure 16-byte alignment
64}
65
66@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
67@group(0) @binding(1) var<storage, read_write> is_outlier: array<u32>;
68@group(0) @binding(2) var<uniform> params: RadiusOutlierParams;
69
70struct RadiusOutlierParams {
71    num_points: u32,
72    radius: f32,
73    min_neighbors: u32,
74    _padding: u32,
75}
76
77@compute @workgroup_size(64)
78fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
79    let index = global_id.x;
80    if (index >= params.num_points) {
81        return;
82    }
83    
84    let center_point = input_points[index].position;
85    var neighbor_count = 0u;
86    
87    // Count neighbors within radius
88    for (var i = 0u; i < params.num_points; i++) {
89        if (i != index) {
90            let neighbor_point = input_points[i].position;
91            let distance = length(center_point - neighbor_point);
92            if (distance <= params.radius) {
93                neighbor_count++;
94            }
95        }
96    }
97    
98    // Mark as outlier if neighbor count is below threshold
99    is_outlier[index] = select(0u, 1u, neighbor_count < params.min_neighbors);
100}
101"#;
102
103const VOXEL_GRID_SHADER: &str = r#"
104struct GpuPoint {
105    position: vec3<f32>,
106    _padding: f32,  // Ensure 16-byte alignment
107}
108
109@group(0) @binding(0) var<storage, read> input_points: array<GpuPoint>;
110@group(0) @binding(1) var<storage, read_write> voxel_indices: array<u32>;
111@group(0) @binding(2) var<uniform> params: VoxelGridParams;
112
113struct VoxelGridParams {
114    num_points: u32,
115    voxel_size: f32,
116    min_x: f32,
117    min_y: f32,
118    min_z: f32,
119    max_x: f32,
120    max_y: f32,
121    max_z: f32,
122}
123
124fn get_voxel_index(point: vec3<f32>, params: VoxelGridParams) -> u32 {
125    // Simple coordinate-based voxel index (similar to CPU version)
126    let voxel_x = i32(floor((point.x - params.min_x) / params.voxel_size));
127    let voxel_y = i32(floor((point.y - params.min_y) / params.voxel_size));
128    let voxel_z = i32(floor((point.z - params.min_z) / params.voxel_size));
129    
130    // Use a simple hash based on coordinates
131    let hash_x = u32(voxel_x * voxel_x + voxel_x);
132    let hash_y = u32(voxel_y * voxel_y + voxel_y * 7);
133    let hash_z = u32(voxel_z * voxel_z + voxel_z * 13);
134    
135    return (hash_x + hash_y * 31u + hash_z * 97u) % 100000u;
136}
137
138@compute @workgroup_size(64)
139fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
140    let index = global_id.x;
141    if (index >= params.num_points) {
142        return;
143    }
144    
145    let point = input_points[index].position;
146    let voxel_idx = get_voxel_index(point, params);
147    
148    // Store voxel index for this point
149    voxel_indices[index] = voxel_idx;
150}
151"#;
152
153impl GpuContext {
154    /// Remove statistical outliers from point cloud using GPU acceleration
155    pub async fn remove_statistical_outliers(
156        &self,
157        points: &[Point3f],
158        k_neighbors: usize,
159        std_dev_multiplier: f32,
160    ) -> Result<Vec<Point3f>> {
161        if points.is_empty() {
162            return Ok(Vec::new());
163        }
164
165        // Convert points to GPU format with proper alignment
166        #[repr(C)]
167        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
168        struct GpuPoint {
169            position: [f32; 3],
170            _padding: f32,
171        }
172        
173        let point_data: Vec<GpuPoint> = points
174            .iter()
175            .map(|p| GpuPoint {
176                position: [p.x, p.y, p.z],
177                _padding: 0.0,
178            })
179            .collect();
180
181        // Convert to the format expected by helper functions
182        let point_data_flat: Vec<[f32; 3]> = point_data.iter().map(|p| p.position).collect();
183        
184        // Compute neighbors (reuse from normals computation)
185        let neighbors = self.compute_neighbors_simple(&point_data_flat, k_neighbors);
186        
187        // Compute global statistics on CPU first (could be moved to GPU)
188        let global_mean = self.compute_global_mean_distance(&point_data_flat, &neighbors, k_neighbors);
189
190        // Create buffers
191        let input_buffer = self.create_buffer_init(
192            "Input Points",
193            &point_data,
194            wgpu::BufferUsages::STORAGE,
195        );
196
197        let neighbors_buffer = self.create_buffer_init(
198            "Neighbors",
199            &neighbors,
200            wgpu::BufferUsages::STORAGE,
201        );
202
203        let outlier_buffer = self.create_buffer(
204            "Outlier Flags",
205            (point_data.len() * std::mem::size_of::<u32>()) as u64,
206            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
207        );
208
209        #[repr(C)]
210        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
211        struct FilterParams {
212            num_points: u32,
213            k_neighbors: u32,
214            std_dev_threshold: f32,
215            mean_distance: f32,
216        }
217
218        let params = FilterParams {
219            num_points: points.len() as u32,
220            k_neighbors: k_neighbors as u32,
221            std_dev_threshold: global_mean * std_dev_multiplier,
222            mean_distance: global_mean,
223        };
224
225        let params_buffer = self.create_buffer_init(
226            "Filter Params",
227            &[params],
228            wgpu::BufferUsages::UNIFORM,
229        );
230
231        // Create shader with MAX_NEIGHBORS constant
232        let shader_source = STATISTICAL_OUTLIER_SHADER.replace("MAX_NEIGHBORS", &k_neighbors.to_string());
233        let shader = self.create_shader_module("Statistical Outlier Filter", &shader_source);
234
235        // Create bind group layout
236        let bind_group_layout = self.create_bind_group_layout(
237            "Outlier Filter",
238            &[
239                wgpu::BindGroupLayoutEntry {
240                    binding: 0,
241                    visibility: wgpu::ShaderStages::COMPUTE,
242                    ty: wgpu::BindingType::Buffer {
243                        ty: wgpu::BufferBindingType::Storage { read_only: true },
244                        has_dynamic_offset: false,
245                        min_binding_size: None,
246                    },
247                    count: None,
248                },
249                wgpu::BindGroupLayoutEntry {
250                    binding: 1,
251                    visibility: wgpu::ShaderStages::COMPUTE,
252                    ty: wgpu::BindingType::Buffer {
253                        ty: wgpu::BufferBindingType::Storage { read_only: true },
254                        has_dynamic_offset: false,
255                        min_binding_size: None,
256                    },
257                    count: None,
258                },
259                wgpu::BindGroupLayoutEntry {
260                    binding: 2,
261                    visibility: wgpu::ShaderStages::COMPUTE,
262                    ty: wgpu::BindingType::Buffer {
263                        ty: wgpu::BufferBindingType::Storage { read_only: false },
264                        has_dynamic_offset: false,
265                        min_binding_size: None,
266                    },
267                    count: None,
268                },
269                wgpu::BindGroupLayoutEntry {
270                    binding: 3,
271                    visibility: wgpu::ShaderStages::COMPUTE,
272                    ty: wgpu::BindingType::Buffer {
273                        ty: wgpu::BufferBindingType::Uniform,
274                        has_dynamic_offset: false,
275                        min_binding_size: None,
276                    },
277                    count: None,
278                },
279            ],
280        );
281
282        // Create compute pipeline
283        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
284            label: Some("Outlier Filter Pipeline"),
285            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
286                label: Some("Outlier Filter Layout"),
287                bind_group_layouts: &[&bind_group_layout],
288                push_constant_ranges: &[],
289            })),
290            module: &shader,
291            entry_point: "main",
292            compilation_options: wgpu::PipelineCompilationOptions::default(),
293        });
294
295        // Create bind group
296        let bind_group = self.create_bind_group(
297            "Outlier Filter",
298            &bind_group_layout,
299            &[
300                wgpu::BindGroupEntry {
301                    binding: 0,
302                    resource: input_buffer.as_entire_binding(),
303                },
304                wgpu::BindGroupEntry {
305                    binding: 1,
306                    resource: neighbors_buffer.as_entire_binding(),
307                },
308                wgpu::BindGroupEntry {
309                    binding: 2,
310                    resource: outlier_buffer.as_entire_binding(),
311                },
312                wgpu::BindGroupEntry {
313                    binding: 3,
314                    resource: params_buffer.as_entire_binding(),
315                },
316            ],
317        );
318
319        // Execute compute shader
320        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
321            label: Some("Outlier Filter"),
322        });
323
324        {
325            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
326                label: Some("Outlier Filter Pass"),
327                timestamp_writes: None,
328            });
329            compute_pass.set_pipeline(&pipeline);
330            compute_pass.set_bind_group(0, &bind_group, &[]);
331            let workgroup_count = (points.len() + 63) / 64;
332            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
333        }
334
335        // Read back results
336        let staging_buffer = self.create_buffer(
337            "Outlier Staging",
338            (point_data.len() * std::mem::size_of::<u32>()) as u64,
339            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
340        );
341
342        encoder.copy_buffer_to_buffer(
343            &outlier_buffer,
344            0,
345            &staging_buffer,
346            0,
347            (point_data.len() * std::mem::size_of::<u32>()) as u64,
348        );
349
350        self.queue.submit(std::iter::once(encoder.finish()));
351
352        // Map and read results
353        let buffer_slice = staging_buffer.slice(..);
354        let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
355        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
356
357        self.device.poll(wgpu::Maintain::wait()).panic_on_timeout();
358
359        if let Some(Ok(())) = receiver.receive().await {
360            let data = buffer_slice.get_mapped_range();
361            let outlier_flags: Vec<u32> = bytemuck::cast_slice(&data).to_vec();
362            
363            let filtered_points: Vec<Point3f> = points
364                .iter()
365                .zip(outlier_flags.iter())
366                .filter(|&(_, &is_outlier)| is_outlier == 0)
367                .map(|(point, _)| *point)
368                .collect();
369            
370            drop(data);
371            staging_buffer.unmap();
372            
373            Ok(filtered_points)
374        } else {
375            Err(threecrate_core::Error::Gpu("Failed to read GPU filtering results".to_string()))
376        }
377    }
378
379    /// Compute global mean distance for statistical filtering
380    fn compute_global_mean_distance(&self, points: &[[f32; 3]], neighbors: &[[u32; 64]], k: usize) -> f32 {
381        let k = k.min(64).min(points.len());
382        let mut total_distance = 0.0;
383        let mut count = 0;
384
385        for (i, point) in points.iter().enumerate() {
386            for j in 0..k {
387                let neighbor_idx = neighbors[i][j] as usize;
388                if neighbor_idx < points.len() {
389                    let neighbor_point = &points[neighbor_idx];
390                    let dx = point[0] - neighbor_point[0];
391                    let dy = point[1] - neighbor_point[1];
392                    let dz = point[2] - neighbor_point[2];
393                    let distance = (dx * dx + dy * dy + dz * dz).sqrt();
394                    total_distance += distance;
395                    count += 1;
396                }
397            }
398        }
399
400        if count > 0 {
401            total_distance / count as f32
402        } else {
403            0.0
404        }
405    }
406
407    /// Remove radius outliers from point cloud using GPU acceleration
408    pub async fn remove_radius_outliers(
409        &self,
410        points: &[Point3f],
411        radius: f32,
412        min_neighbors: usize,
413    ) -> Result<Vec<Point3f>> {
414        if points.is_empty() {
415            return Ok(Vec::new());
416        }
417
418        // Convert points to GPU format with proper alignment
419        #[repr(C)]
420        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
421        struct GpuPoint {
422            position: [f32; 3],
423            _padding: f32,
424        }
425        
426        let point_data: Vec<GpuPoint> = points
427            .iter()
428            .map(|p| GpuPoint {
429                position: [p.x, p.y, p.z],
430                _padding: 0.0,
431            })
432            .collect();
433
434        // Create buffers
435        let input_buffer = self.create_buffer_init(
436            "Input Points",
437            &point_data,
438            wgpu::BufferUsages::STORAGE,
439        );
440
441        let outlier_buffer = self.create_buffer(
442            "Outlier Flags",
443            (point_data.len() * std::mem::size_of::<u32>()) as u64,
444            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
445        );
446
447        #[repr(C)]
448        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
449        struct RadiusOutlierParams {
450            num_points: u32,
451            radius: f32,
452            min_neighbors: u32,
453            _padding: u32,
454        }
455
456        let params = RadiusOutlierParams {
457            num_points: points.len() as u32,
458            radius,
459            min_neighbors: min_neighbors as u32,
460            _padding: 0,
461        };
462
463        let params_buffer = self.create_buffer_init(
464            "Radius Outlier Params",
465            &[params],
466            wgpu::BufferUsages::UNIFORM,
467        );
468
469        // Create shader
470        let shader = self.create_shader_module("Radius Outlier Filter", RADIUS_OUTLIER_SHADER);
471
472        // Create bind group layout
473        let bind_group_layout = self.create_bind_group_layout(
474            "Radius Outlier Filter",
475            &[
476                wgpu::BindGroupLayoutEntry {
477                    binding: 0,
478                    visibility: wgpu::ShaderStages::COMPUTE,
479                    ty: wgpu::BindingType::Buffer {
480                        ty: wgpu::BufferBindingType::Storage { read_only: true },
481                        has_dynamic_offset: false,
482                        min_binding_size: None,
483                    },
484                    count: None,
485                },
486                wgpu::BindGroupLayoutEntry {
487                    binding: 1,
488                    visibility: wgpu::ShaderStages::COMPUTE,
489                    ty: wgpu::BindingType::Buffer {
490                        ty: wgpu::BufferBindingType::Storage { read_only: false },
491                        has_dynamic_offset: false,
492                        min_binding_size: None,
493                    },
494                    count: None,
495                },
496                wgpu::BindGroupLayoutEntry {
497                    binding: 2,
498                    visibility: wgpu::ShaderStages::COMPUTE,
499                    ty: wgpu::BindingType::Buffer {
500                        ty: wgpu::BufferBindingType::Uniform,
501                        has_dynamic_offset: false,
502                        min_binding_size: None,
503                    },
504                    count: None,
505                },
506            ],
507        );
508
509        // Create compute pipeline
510        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
511            label: Some("Radius Outlier Filter Pipeline"),
512            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
513                label: Some("Radius Outlier Filter Layout"),
514                bind_group_layouts: &[&bind_group_layout],
515                push_constant_ranges: &[],
516            })),
517            module: &shader,
518            entry_point: "main",
519            compilation_options: wgpu::PipelineCompilationOptions::default(),
520        });
521
522        // Create bind group
523        let bind_group = self.create_bind_group(
524            "Radius Outlier Filter",
525            &bind_group_layout,
526            &[
527                wgpu::BindGroupEntry {
528                    binding: 0,
529                    resource: input_buffer.as_entire_binding(),
530                },
531                wgpu::BindGroupEntry {
532                    binding: 1,
533                    resource: outlier_buffer.as_entire_binding(),
534                },
535                wgpu::BindGroupEntry {
536                    binding: 2,
537                    resource: params_buffer.as_entire_binding(),
538                },
539            ],
540        );
541
542        // Execute compute shader
543        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
544            label: Some("Radius Outlier Filter"),
545        });
546
547        {
548            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
549                label: Some("Radius Outlier Filter Pass"),
550                timestamp_writes: None,
551            });
552            compute_pass.set_pipeline(&pipeline);
553            compute_pass.set_bind_group(0, &bind_group, &[]);
554            let workgroup_count = (points.len() + 63) / 64;
555            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
556        }
557
558        // Read back results
559        let staging_buffer = self.create_buffer(
560            "Radius Outlier Staging",
561            (point_data.len() * std::mem::size_of::<u32>()) as u64,
562            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
563        );
564
565        encoder.copy_buffer_to_buffer(
566            &outlier_buffer,
567            0,
568            &staging_buffer,
569            0,
570            (point_data.len() * std::mem::size_of::<u32>()) as u64,
571        );
572
573        self.queue.submit(std::iter::once(encoder.finish()));
574
575        // Map and read results
576        let buffer_slice = staging_buffer.slice(..);
577        let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
578        buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
579
580        self.device.poll(wgpu::Maintain::wait()).panic_on_timeout();
581
582        if let Some(Ok(())) = receiver.receive().await {
583            let data = buffer_slice.get_mapped_range();
584            let outlier_flags: Vec<u32> = bytemuck::cast_slice(&data).to_vec();
585            
586            let filtered_points: Vec<Point3f> = points
587                .iter()
588                .zip(outlier_flags.iter())
589                .filter(|&(_, &is_outlier)| is_outlier == 0)
590                .map(|(point, _)| *point)
591                .collect();
592            
593            drop(data);
594            staging_buffer.unmap();
595            
596            Ok(filtered_points)
597        } else {
598            Err(threecrate_core::Error::Gpu("Failed to read GPU radius outlier filtering results".to_string()))
599        }
600    }
601
602    /// Voxel grid filtering using GPU acceleration
603    pub async fn voxel_grid_filter(
604        &self,
605        points: &[Point3f],
606        voxel_size: f32,
607    ) -> Result<Vec<Point3f>> {
608        if points.is_empty() {
609            return Ok(Vec::new());
610        }
611
612        if voxel_size <= 0.0 {
613            return Err(threecrate_core::Error::InvalidData("Voxel size must be positive".to_string()));
614        }
615
616        // Convert points to GPU format
617        #[repr(C)]
618        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
619        struct GpuPoint {
620            position: [f32; 3],
621            _padding: f32,
622        }
623        
624        let point_data: Vec<GpuPoint> = points
625            .iter()
626            .map(|p| GpuPoint {
627                position: [p.x, p.y, p.z],
628                _padding: 0.0,
629            })
630            .collect();
631
632        // Compute grid bounds
633        let min_x = points.iter().map(|p| p.x).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
634        let min_y = points.iter().map(|p| p.y).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
635        let min_z = points.iter().map(|p| p.z).min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
636        let max_x = points.iter().map(|p| p.x).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
637        let max_y = points.iter().map(|p| p.y).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
638        let max_z = points.iter().map(|p| p.z).max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
639
640        // Estimate total voxels for buffer allocation (we'll use a hash-based approach)
641        let total_voxels = points.len() * 2; // Conservative estimate
642
643        // Create buffers
644        let input_buffer = self.create_buffer_init(
645            "Input Points",
646            &point_data,
647            wgpu::BufferUsages::STORAGE,
648        );
649
650        let voxel_indices_buffer = self.create_buffer(
651            "Voxel Indices",
652            (point_data.len() * std::mem::size_of::<u32>()) as u64,
653            wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
654        );
655
656
657
658        #[repr(C)]
659        #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
660        struct VoxelGridParams {
661            num_points: u32,
662            voxel_size: f32,
663            min_x: f32,
664            min_y: f32,
665            min_z: f32,
666            max_x: f32,
667            max_y: f32,
668            max_z: f32,
669        }
670
671        let params = VoxelGridParams {
672            num_points: points.len() as u32,
673            voxel_size,
674            min_x,
675            min_y,
676            min_z,
677            max_x,
678            max_y,
679            max_z,
680        };
681
682        let params_buffer = self.create_buffer_init(
683            "Voxel Grid Params",
684            &[params],
685            wgpu::BufferUsages::UNIFORM,
686        );
687
688        // Create shader
689        let shader = self.create_shader_module("Voxel Grid Filter", VOXEL_GRID_SHADER);
690
691        // Create bind group layout
692        let bind_group_layout = self.create_bind_group_layout(
693            "Voxel Grid Filter",
694            &[
695                wgpu::BindGroupLayoutEntry {
696                    binding: 0,
697                    visibility: wgpu::ShaderStages::COMPUTE,
698                    ty: wgpu::BindingType::Buffer {
699                        ty: wgpu::BufferBindingType::Storage { read_only: true },
700                        has_dynamic_offset: false,
701                        min_binding_size: None,
702                    },
703                    count: None,
704                },
705                wgpu::BindGroupLayoutEntry {
706                    binding: 1,
707                    visibility: wgpu::ShaderStages::COMPUTE,
708                    ty: wgpu::BindingType::Buffer {
709                        ty: wgpu::BufferBindingType::Storage { read_only: false },
710                        has_dynamic_offset: false,
711                        min_binding_size: None,
712                    },
713                    count: None,
714                },
715                wgpu::BindGroupLayoutEntry {
716                    binding: 2,
717                    visibility: wgpu::ShaderStages::COMPUTE,
718                    ty: wgpu::BindingType::Buffer {
719                        ty: wgpu::BufferBindingType::Uniform,
720                        has_dynamic_offset: false,
721                        min_binding_size: None,
722                    },
723                    count: None,
724                },
725            ],
726        );
727
728        // Create compute pipeline
729        let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
730            label: Some("Voxel Grid Filter Pipeline"),
731            layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
732                label: Some("Voxel Grid Filter Layout"),
733                bind_group_layouts: &[&bind_group_layout],
734                push_constant_ranges: &[],
735            })),
736            module: &shader,
737            entry_point: "main",
738            compilation_options: wgpu::PipelineCompilationOptions::default(),
739        });
740
741        // Create bind group
742        let bind_group = self.create_bind_group(
743            "Voxel Grid Filter",
744            &bind_group_layout,
745            &[
746                wgpu::BindGroupEntry {
747                    binding: 0,
748                    resource: input_buffer.as_entire_binding(),
749                },
750                wgpu::BindGroupEntry {
751                    binding: 1,
752                    resource: voxel_indices_buffer.as_entire_binding(),
753                },
754                wgpu::BindGroupEntry {
755                    binding: 2,
756                    resource: params_buffer.as_entire_binding(),
757                },
758            ],
759        );
760
761        // Execute compute shader
762        let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
763            label: Some("Voxel Grid Filter"),
764        });
765
766        {
767            let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
768                label: Some("Voxel Grid Filter Pass"),
769                timestamp_writes: None,
770            });
771            compute_pass.set_pipeline(&pipeline);
772            compute_pass.set_bind_group(0, &bind_group, &[]);
773            let workgroup_count = (points.len() + 63) / 64;
774            compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
775        }
776
777        // Read back voxel indices
778        let indices_staging = self.create_buffer(
779            "Voxel Indices Staging",
780            (point_data.len() * std::mem::size_of::<u32>()) as u64,
781            wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
782        );
783
784        encoder.copy_buffer_to_buffer(
785            &voxel_indices_buffer,
786            0,
787            &indices_staging,
788            0,
789            (point_data.len() * std::mem::size_of::<u32>()) as u64,
790        );
791
792        self.queue.submit(std::iter::once(encoder.finish()));
793
794        // Map and read results
795        let indices_slice = indices_staging.slice(..);
796        
797        let (indices_sender, indices_receiver) = futures_intrusive::channel::shared::oneshot_channel();
798        
799        indices_slice.map_async(wgpu::MapMode::Read, move |v| indices_sender.send(v).unwrap());
800
801        self.device.poll(wgpu::Maintain::wait()).panic_on_timeout();
802
803        if let Some(Ok(())) = indices_receiver.receive().await {
804            let indices_data = indices_slice.get_mapped_range();
805            
806            let voxel_indices: Vec<u32> = bytemuck::cast_slice(&indices_data).to_vec();
807            
808            // Post-process on CPU: keep one point per voxel (the first one)
809            let mut voxel_used = vec![false; total_voxels];
810            let mut filtered_points = Vec::new();
811            
812            for (point_idx, &voxel_idx) in voxel_indices.iter().enumerate() {
813                if voxel_idx < total_voxels as u32 && !voxel_used[voxel_idx as usize] {
814                    filtered_points.push(points[point_idx]);
815                    voxel_used[voxel_idx as usize] = true;
816                }
817            }
818            
819            drop(indices_data);
820            indices_staging.unmap();
821            
822            Ok(filtered_points)
823        } else {
824            Err(threecrate_core::Error::Gpu("Failed to read GPU voxel grid filtering results".to_string()))
825        }
826    }
827}
828
829/// GPU-accelerated statistical outlier removal
830pub async fn gpu_remove_statistical_outliers(
831    gpu_context: &GpuContext,
832    cloud: &PointCloud<Point3f>,
833    k_neighbors: usize,
834    std_dev_multiplier: f32,
835) -> Result<PointCloud<Point3f>> {
836    let filtered_points = gpu_context.remove_statistical_outliers(&cloud.points, k_neighbors, std_dev_multiplier).await?;
837    Ok(PointCloud::from_points(filtered_points))
838}
839
840/// GPU-accelerated radius outlier removal
841pub async fn gpu_radius_outlier_removal(
842    gpu_context: &GpuContext,
843    cloud: &PointCloud<Point3f>,
844    radius: f32,
845    min_neighbors: usize,
846) -> Result<PointCloud<Point3f>> {
847    let filtered_points = gpu_context.remove_radius_outliers(&cloud.points, radius, min_neighbors).await?;
848    Ok(PointCloud::from_points(filtered_points))
849}
850
851/// GPU-accelerated voxel grid filtering
852pub async fn gpu_voxel_grid_filter(
853    gpu_context: &GpuContext,
854    cloud: &PointCloud<Point3f>,
855    voxel_size: f32,
856) -> Result<PointCloud<Point3f>> {
857    let filtered_points = gpu_context.voxel_grid_filter(&cloud.points, voxel_size).await?;
858    Ok(PointCloud::from_points(filtered_points))
859} 
860
861#[cfg(test)]
862mod tests {
863    use super::*;
864    use threecrate_core::{PointCloud, Point3f};
865    use std::time::Instant;
866
867    async fn try_create_gpu_context() -> Option<GpuContext> {
868        GpuContext::new().await.ok()
869    }
870
871    fn create_test_point_cloud_with_outliers() -> PointCloud<Point3f> {
872        let mut points = Vec::new();
873        
874        // Create a cluster of normal points
875        for i in 0..50 {
876            for j in 0..50 {
877                let x = (i as f32 - 25.0) * 0.1;
878                let y = (j as f32 - 25.0) * 0.1;
879                let z = 0.0;
880                points.push(Point3f::new(x, y, z));
881            }
882        }
883        
884        // Add some outliers far from the cluster
885        for i in 0..20 {
886            let x = 50.0 + (i as f32 * 2.0);
887            let y = 50.0 + (i as f32 * 2.0);
888            let z = 50.0 + (i as f32 * 2.0);
889            points.push(Point3f::new(x, y, z));
890        }
891        
892        PointCloud::from_points(points)
893    }
894
895    fn create_test_point_cloud_no_outliers() -> PointCloud<Point3f> {
896        let mut points = Vec::new();
897        
898        // Create a uniform grid of points
899        for i in 0..25 {
900            for j in 0..25 {
901                for k in 0..5 {
902                    let x = (i as f32 - 12.5) * 0.1;
903                    let y = (j as f32 - 12.5) * 0.1;
904                    let z = (k as f32 - 2.5) * 0.1;
905                    points.push(Point3f::new(x, y, z));
906                }
907            }
908        }
909        
910        PointCloud::from_points(points)
911    }
912
913    #[tokio::test]
914    async fn test_gpu_statistical_outlier_removal_basic() {
915        let gpu_context = match try_create_gpu_context().await {
916            Some(ctx) => ctx,
917            None => {
918                println!("Skipping GPU test - no GPU available");
919                return;
920            }
921        };
922
923        let cloud = create_test_point_cloud_with_outliers();
924        let original_count = cloud.len();
925        
926        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
927        let filtered_count = filtered.len();
928        
929        // Should remove some outliers
930        assert!(filtered_count < original_count);
931        assert!(filtered_count > 0);
932        
933        println!("GPU outlier removal: {} -> {} points", original_count, filtered_count);
934    }
935
936    #[tokio::test]
937    async fn test_gpu_statistical_outlier_removal_no_outliers() {
938        let gpu_context = match try_create_gpu_context().await {
939            Some(ctx) => ctx,
940            None => {
941                println!("Skipping GPU test - no GPU available");
942                return;
943            }
944        };
945
946        let cloud = create_test_point_cloud_no_outliers();
947        let original_count = cloud.len();
948        
949        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
950        let filtered_count = filtered.len();
951        
952        println!("GPU outlier removal (no outliers): {} -> {} points", original_count, filtered_count);
953        
954        // The algorithm should remove some points but not all
955        // Even with no obvious outliers, the algorithm may remove points based on statistical analysis
956        assert!(filtered_count > 0);
957        assert!(filtered_count < original_count);
958    }
959
960    #[tokio::test]
961    async fn test_gpu_statistical_outlier_removal_empty_cloud() {
962        let gpu_context = match try_create_gpu_context().await {
963            Some(ctx) => ctx,
964            None => {
965                println!("Skipping GPU test - no GPU available");
966                return;
967            }
968        };
969
970        let cloud = PointCloud::<Point3f>::new();
971        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
972        
973        assert_eq!(filtered.len(), 0);
974    }
975
976    #[tokio::test]
977    async fn test_gpu_statistical_outlier_removal_single_point() {
978        let gpu_context = match try_create_gpu_context().await {
979            Some(ctx) => ctx,
980            None => {
981                println!("Skipping GPU test - no GPU available");
982                return;
983            }
984        };
985
986        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
987        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 1, 1.0).await.unwrap();
988        
989        // Should keep the single point
990        assert_eq!(filtered.len(), 1);
991    }
992
993    #[tokio::test]
994    async fn test_gpu_vs_cpu_statistical_outlier_removal() {
995        let gpu_context = match try_create_gpu_context().await {
996            Some(ctx) => ctx,
997            None => {
998                println!("Skipping GPU vs CPU test - no GPU available");
999                return;
1000            }
1001        };
1002
1003        let cloud = create_test_point_cloud_with_outliers();
1004        
1005        // CPU version - use the same algorithm logic but avoid type conflicts
1006        let cpu_start = Instant::now();
1007        let cpu_filtered_count = {
1008            // Simple CPU implementation for comparison
1009            let points = &cloud.points;
1010            let k = 10;
1011            let std_dev_multiplier = 1.0;
1012            
1013            if points.is_empty() {
1014                0
1015            } else {
1016                // Simple distance-based outlier detection
1017                let mut mean_distances = Vec::new();
1018                for point in points {
1019                    let mut distances = Vec::new();
1020                    for other_point in points {
1021                        if other_point != point {
1022                            let dx = point.x - other_point.x;
1023                            let dy = point.y - other_point.y;
1024                            let dz = point.z - other_point.z;
1025                            let distance = (dx * dx + dy * dy + dz * dz).sqrt();
1026                            distances.push(distance);
1027                        }
1028                    }
1029                    distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
1030                    let k_neighbors = k.min(distances.len());
1031                    if k_neighbors > 0 {
1032                        let mean = distances[..k_neighbors].iter().sum::<f32>() / k_neighbors as f32;
1033                        mean_distances.push(mean);
1034                    } else {
1035                        mean_distances.push(0.0);
1036                    }
1037                }
1038                
1039                let global_mean = mean_distances.iter().sum::<f32>() / mean_distances.len() as f32;
1040                let variance = mean_distances.iter().map(|&d| (d - global_mean).powi(2)).sum::<f32>() / mean_distances.len() as f32;
1041                let global_std_dev = variance.sqrt();
1042                let threshold = global_mean + std_dev_multiplier * global_std_dev;
1043                
1044                mean_distances.iter().filter(|&&d| d <= threshold).count()
1045            }
1046        };
1047        let cpu_time = cpu_start.elapsed();
1048        
1049        // GPU version
1050        let gpu_start = Instant::now();
1051        let gpu_filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, 1.0).await.unwrap();
1052        let gpu_time = gpu_start.elapsed();
1053        
1054        println!("CPU outlier removal: {} -> {} points in {:?}", 
1055                cloud.len(), cpu_filtered_count, cpu_time);
1056        println!("GPU outlier removal: {} -> {} points in {:?}", 
1057                cloud.len(), gpu_filtered.len(), gpu_time);
1058        
1059        // Both should remove outliers (exact counts may vary due to different algorithms)
1060        assert!(cpu_filtered_count < cloud.len());
1061        assert!(gpu_filtered.len() < cloud.len());
1062        
1063        // Performance comparison (GPU should be faster for larger datasets)
1064        if cloud.len() > 1000 {
1065            println!("GPU speedup: {:.2}x", cpu_time.as_secs_f32() / gpu_time.as_secs_f32());
1066        }
1067    }
1068
1069    #[tokio::test]
1070    async fn test_gpu_statistical_outlier_removal_different_parameters() {
1071        let gpu_context = match try_create_gpu_context().await {
1072            Some(ctx) => ctx,
1073            None => {
1074                println!("Skipping GPU test - no GPU available");
1075                return;
1076            }
1077        };
1078
1079        let cloud = create_test_point_cloud_with_outliers();
1080        let original_count = cloud.len();
1081        
1082        // Test different k values
1083        for k in [5, 10, 20] {
1084            let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, k, 1.0).await.unwrap();
1085            println!("k={}: {} -> {} points", k, original_count, filtered.len());
1086            assert!(filtered.len() > 0);
1087        }
1088        
1089        // Test different std_dev_multiplier values
1090        for std_dev in [0.5, 1.0, 2.0] {
1091            let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 10, std_dev).await.unwrap();
1092            println!("std_dev={}: {} -> {} points", std_dev, original_count, filtered.len());
1093            assert!(filtered.len() > 0);
1094        }
1095    }
1096
1097    #[tokio::test]
1098    async fn test_gpu_statistical_outlier_removal_large_dataset() {
1099        let gpu_context = match try_create_gpu_context().await {
1100            Some(ctx) => ctx,
1101            None => {
1102                println!("Skipping GPU test - no GPU available");
1103                return;
1104            }
1105        };
1106
1107        // Create a smaller dataset for faster testing
1108        let mut points = Vec::new();
1109        for i in 0..50 {
1110            for j in 0..50 {
1111                let x = (i as f32 - 25.0) * 0.1;
1112                let y = (j as f32 - 25.0) * 0.1;
1113                let z = 0.0;
1114                points.push(Point3f::new(x, y, z));
1115            }
1116        }
1117        
1118        // Add some outliers
1119        for i in 0..10 {
1120            let x = 25.0 + (i as f32 * 2.0);
1121            let y = 25.0 + (i as f32 * 2.0);
1122            let z = 25.0 + (i as f32 * 2.0);
1123            points.push(Point3f::new(x, y, z));
1124        }
1125        
1126        let cloud = PointCloud::from_points(points);
1127        let original_count = cloud.len();
1128        
1129        let start = Instant::now();
1130        let filtered = gpu_remove_statistical_outliers(&gpu_context, &cloud, 5, 1.0).await.unwrap();
1131        let elapsed = start.elapsed();
1132        
1133        println!("Large dataset GPU outlier removal: {} -> {} points in {:?}", 
1134                original_count, filtered.len(), elapsed);
1135        
1136        assert!(filtered.len() < original_count);
1137        assert!(filtered.len() > 0);
1138        
1139        // Should complete in reasonable time
1140        assert!(elapsed.as_secs() < 10);
1141    }
1142
1143    fn create_test_point_cloud_for_radius_outlier() -> PointCloud<Point3f> {
1144        let mut points = Vec::new();
1145        
1146        // Create a dense cluster
1147        for i in 0..20 {
1148            for j in 0..20 {
1149                let x = (i as f32 - 10.0) * 0.1;
1150                let y = (j as f32 - 10.0) * 0.1;
1151                let z = 0.0;
1152                points.push(Point3f::new(x, y, z));
1153            }
1154        }
1155        
1156        // Add some isolated points (outliers)
1157        for i in 0..5 {
1158            let x = 10.0 + (i as f32 * 2.0);
1159            let y = 10.0 + (i as f32 * 2.0);
1160            let z = 10.0 + (i as f32 * 2.0);
1161            points.push(Point3f::new(x, y, z));
1162        }
1163        
1164        PointCloud::from_points(points)
1165    }
1166
1167    #[tokio::test]
1168    async fn test_gpu_radius_outlier_removal_basic() {
1169        let gpu_context = match try_create_gpu_context().await {
1170            Some(ctx) => ctx,
1171            None => {
1172                println!("Skipping GPU test - no GPU available");
1173                return;
1174            }
1175        };
1176
1177        let cloud = create_test_point_cloud_for_radius_outlier();
1178        let original_count = cloud.len();
1179        
1180        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1181        let filtered_count = filtered.len();
1182        
1183        // Should remove some outliers
1184        assert!(filtered_count < original_count);
1185        assert!(filtered_count > 0);
1186        
1187        println!("GPU radius outlier removal: {} -> {} points", original_count, filtered_count);
1188    }
1189
1190    #[tokio::test]
1191    async fn test_gpu_radius_outlier_removal_empty_cloud() {
1192        let gpu_context = match try_create_gpu_context().await {
1193            Some(ctx) => ctx,
1194            None => {
1195                println!("Skipping GPU test - no GPU available");
1196                return;
1197            }
1198        };
1199
1200        let cloud = PointCloud::<Point3f>::new();
1201        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1202        
1203        assert_eq!(filtered.len(), 0);
1204    }
1205
1206    #[tokio::test]
1207    async fn test_gpu_radius_outlier_removal_single_point() {
1208        let gpu_context = match try_create_gpu_context().await {
1209            Some(ctx) => ctx,
1210            None => {
1211                println!("Skipping GPU test - no GPU available");
1212                return;
1213            }
1214        };
1215
1216        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1217        let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 1).await.unwrap();
1218        
1219        // Should remove the single point since it has no neighbors
1220        assert_eq!(filtered.len(), 0);
1221    }
1222
1223    #[tokio::test]
1224    async fn test_gpu_radius_outlier_removal_different_parameters() {
1225        let gpu_context = match try_create_gpu_context().await {
1226            Some(ctx) => ctx,
1227            None => {
1228                println!("Skipping GPU test - no GPU available");
1229                return;
1230            }
1231        };
1232
1233        let cloud = create_test_point_cloud_for_radius_outlier();
1234        let original_count = cloud.len();
1235        
1236        // Test different radius values
1237        for radius in [0.2, 0.5, 1.0] {
1238            let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, radius, 3).await.unwrap();
1239            println!("radius={}: {} -> {} points", radius, original_count, filtered.len());
1240            assert!(filtered.len() > 0);
1241        }
1242        
1243        // Test different min_neighbors values
1244        for min_neighbors in [1, 3, 5] {
1245            let filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, min_neighbors).await.unwrap();
1246            println!("min_neighbors={}: {} -> {} points", min_neighbors, original_count, filtered.len());
1247            assert!(filtered.len() > 0);
1248        }
1249    }
1250
1251    fn create_test_point_cloud_for_voxel_grid() -> PointCloud<Point3f> {
1252        let mut points = Vec::new();
1253        
1254        // Create a dense grid of points
1255        for i in 0..10 {
1256            for j in 0..10 {
1257                for k in 0..5 {
1258                    let x = (i as f32 - 5.0) * 0.1;
1259                    let y = (j as f32 - 5.0) * 0.1;
1260                    let z = (k as f32 - 2.5) * 0.1;
1261                    points.push(Point3f::new(x, y, z));
1262                }
1263            }
1264        }
1265        
1266        // Add some duplicate points in the same voxels
1267        for i in 0..5 {
1268            for j in 0..5 {
1269                let x = (i as f32 - 2.5) * 0.1;
1270                let y = (j as f32 - 2.5) * 0.1;
1271                let z = 0.0;
1272                points.push(Point3f::new(x, y, z));
1273                points.push(Point3f::new(x, y, z)); // Duplicate
1274            }
1275        }
1276        
1277        PointCloud::from_points(points)
1278    }
1279
1280    #[tokio::test]
1281    async fn test_gpu_voxel_grid_filter_basic() {
1282        let gpu_context = match try_create_gpu_context().await {
1283            Some(ctx) => ctx,
1284            None => {
1285                println!("Skipping GPU test - no GPU available");
1286                return;
1287            }
1288        };
1289
1290        let cloud = create_test_point_cloud_for_voxel_grid();
1291        let original_count = cloud.len();
1292        
1293        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1294        let filtered_count = filtered.len();
1295        
1296        // Should reduce the number of points by removing duplicates
1297        assert!(filtered_count < original_count);
1298        assert!(filtered_count > 0);
1299        
1300        println!("GPU voxel grid filter: {} -> {} points", original_count, filtered_count);
1301    }
1302
1303    #[tokio::test]
1304    async fn test_gpu_voxel_grid_filter_empty_cloud() {
1305        let gpu_context = match try_create_gpu_context().await {
1306            Some(ctx) => ctx,
1307            None => {
1308                println!("Skipping GPU test - no GPU available");
1309                return;
1310            }
1311        };
1312
1313        let cloud = PointCloud::<Point3f>::new();
1314        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1315        
1316        assert_eq!(filtered.len(), 0);
1317    }
1318
1319    #[tokio::test]
1320    async fn test_gpu_voxel_grid_filter_single_point() {
1321        let gpu_context = match try_create_gpu_context().await {
1322            Some(ctx) => ctx,
1323            None => {
1324                println!("Skipping GPU test - no GPU available");
1325                return;
1326            }
1327        };
1328
1329        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1330        let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1331        
1332        // Should keep the single point
1333        assert_eq!(filtered.len(), 1);
1334    }
1335
1336    #[tokio::test]
1337    async fn test_gpu_voxel_grid_filter_different_voxel_sizes() {
1338        let gpu_context = match try_create_gpu_context().await {
1339            Some(ctx) => ctx,
1340            None => {
1341                println!("Skipping GPU test - no GPU available");
1342                return;
1343            }
1344        };
1345
1346        let cloud = create_test_point_cloud_for_voxel_grid();
1347        let original_count = cloud.len();
1348        
1349        // Test different voxel sizes
1350        for voxel_size in [0.05, 0.1, 0.2] {
1351            let filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, voxel_size).await.unwrap();
1352            println!("voxel_size={}: {} -> {} points", voxel_size, original_count, filtered.len());
1353            assert!(filtered.len() > 0);
1354            assert!(filtered.len() <= original_count);
1355        }
1356    }
1357
1358    #[tokio::test]
1359    async fn test_gpu_voxel_grid_filter_invalid_voxel_size() {
1360        let gpu_context = match try_create_gpu_context().await {
1361            Some(ctx) => ctx,
1362            None => {
1363                println!("Skipping GPU test - no GPU available");
1364                return;
1365            }
1366        };
1367
1368        let cloud = PointCloud::from_points(vec![Point3f::new(0.0, 0.0, 0.0)]);
1369        let result = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.0).await;
1370        assert!(result.is_err());
1371        
1372        let result = gpu_voxel_grid_filter(&gpu_context, &cloud, -1.0).await;
1373        assert!(result.is_err());
1374    }
1375
1376    #[tokio::test]
1377    async fn test_gpu_vs_cpu_performance_comparison() {
1378        let gpu_context = match try_create_gpu_context().await {
1379            Some(ctx) => ctx,
1380            None => {
1381                println!("Skipping GPU vs CPU test - no GPU available");
1382                return;
1383            }
1384        };
1385
1386        let cloud = create_test_point_cloud_for_radius_outlier();
1387        
1388        // GPU radius outlier removal
1389        let gpu_start = Instant::now();
1390        let gpu_filtered = gpu_radius_outlier_removal(&gpu_context, &cloud, 0.5, 3).await.unwrap();
1391        let gpu_time = gpu_start.elapsed();
1392        
1393        // GPU voxel grid filter
1394        let gpu_voxel_start = Instant::now();
1395        let gpu_voxel_filtered = gpu_voxel_grid_filter(&gpu_context, &cloud, 0.1).await.unwrap();
1396        let gpu_voxel_time = gpu_voxel_start.elapsed();
1397        
1398        println!("GPU radius outlier removal: {} -> {} points in {:?}", 
1399                cloud.len(), gpu_filtered.len(), gpu_time);
1400        println!("GPU voxel grid filter: {} -> {} points in {:?}", 
1401                cloud.len(), gpu_voxel_filtered.len(), gpu_voxel_time);
1402        
1403        // Both should complete successfully
1404        assert!(gpu_filtered.len() > 0);
1405        assert!(gpu_voxel_filtered.len() > 0);
1406        
1407        // Should complete in reasonable time
1408        assert!(gpu_time.as_secs() < 10);
1409        assert!(gpu_voxel_time.as_secs() < 10);
1410    }
1411}