use threecrate_core::{PointCloud, Result, Point3f, Vector3f};
use crate::GpuContext;
use nalgebra::{Isometry3, Matrix6, Vector6, UnitQuaternion, Translation3};
const ICP_NEAREST_NEIGHBOR_SHADER: &str = r#"
@group(0) @binding(0) var<storage, read> source_points: array<vec4<f32>>;
@group(0) @binding(1) var<storage, read> target_points: array<vec4<f32>>;
@group(0) @binding(2) var<storage, read_write> correspondences: array<u32>;
@group(0) @binding(3) var<storage, read_write> distances: array<f32>;
@group(0) @binding(4) var<uniform> params: ICPParams;
struct ICPParams {
num_source: u32,
num_target: u32,
max_distance: f32,
}
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let index = global_id.x;
if (index >= params.num_source) {
return;
}
let source_point = source_points[index].xyz;
var min_distance = params.max_distance;
var best_match = 0u;
// Find nearest neighbor in target
for (var i = 0u; i < params.num_target; i++) {
let target_point = target_points[i].xyz;
let diff = source_point - target_point;
let distance = length(diff);
if (distance < min_distance) {
min_distance = distance;
best_match = i;
}
}
correspondences[index] = best_match;
distances[index] = min_distance;
}
"#;
const ICP_CENTROID_SHADER: &str = r#"
@group(0) @binding(0) var<storage, read> source_points: array<vec4<f32>>;
@group(0) @binding(1) var<storage, read> target_points: array<vec4<f32>>;
@group(0) @binding(2) var<storage, read> correspondences: array<u32>;
@group(0) @binding(3) var<storage, read_write> centroids: array<vec4<f32>>; // [source_centroid, target_centroid]
@group(0) @binding(4) var<uniform> params: CentroidParams;
struct CentroidParams {
num_correspondences: u32,
}
@compute @workgroup_size(1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
if (global_id.x != 0u) {
return;
}
var source_centroid = vec3<f32>(0.0);
var target_centroid = vec3<f32>(0.0);
for (var i = 0u; i < params.num_correspondences; i++) {
let target_idx = correspondences[i];
source_centroid += source_points[i].xyz;
target_centroid += target_points[target_idx].xyz;
}
let scale = 1.0 / f32(params.num_correspondences);
centroids[0] = vec4<f32>(source_centroid * scale, 0.0);
centroids[1] = vec4<f32>(target_centroid * scale, 0.0);
}
"#;
#[allow(dead_code)]
const ICP_COVARIANCE_SHADER: &str = r#"
@group(0) @binding(0) var<storage, read> source_points: array<vec4<f32>>;
@group(0) @binding(1) var<storage, read> target_points: array<vec4<f32>>;
@group(0) @binding(2) var<storage, read> correspondences: array<u32>;
@group(0) @binding(3) var<storage, read> centroids: array<vec4<f32>>;
@group(0) @binding(4) var<storage, read_write> covariance: array<f32>; // 9 elements for 3x3 matrix
@group(0) @binding(5) var<uniform> params: CovarianceParams;
struct CovarianceParams {
num_correspondences: u32,
}
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let index = global_id.x;
if (index >= params.num_correspondences) {
return;
}
let source_centroid = centroids[0].xyz;
let target_centroid = centroids[1].xyz;
let target_idx = correspondences[index];
let source_centered = source_points[index].xyz - source_centroid;
let target_centered = target_points[target_idx].xyz - target_centroid;
// Compute outer product contribution
let h00 = source_centered.x * target_centered.x;
let h01 = source_centered.x * target_centered.y;
let h02 = source_centered.x * target_centered.z;
let h10 = source_centered.y * target_centered.x;
let h11 = source_centered.y * target_centered.y;
let h12 = source_centered.y * target_centered.z;
let h20 = source_centered.z * target_centered.x;
let h21 = source_centered.z * target_centered.y;
let h22 = source_centered.z * target_centered.z;
// Atomic add to covariance matrix (approximated with individual element updates)
atomicAdd(&covariance[0], bitcast<i32>(h00));
atomicAdd(&covariance[1], bitcast<i32>(h01));
atomicAdd(&covariance[2], bitcast<i32>(h02));
atomicAdd(&covariance[3], bitcast<i32>(h10));
atomicAdd(&covariance[4], bitcast<i32>(h11));
atomicAdd(&covariance[5], bitcast<i32>(h12));
atomicAdd(&covariance[6], bitcast<i32>(h20));
atomicAdd(&covariance[7], bitcast<i32>(h21));
atomicAdd(&covariance[8], bitcast<i32>(h22));
}
"#;
#[derive(Debug, Clone)]
pub struct BatchICPJob {
pub source: PointCloud<Point3f>,
pub target: PointCloud<Point3f>,
pub max_iterations: usize,
pub convergence_threshold: f32,
pub max_correspondence_distance: f32,
}
#[derive(Debug, Clone)]
pub struct BatchICPResult {
pub transformation: Isometry3<f32>,
pub final_error: f32,
pub iterations: usize,
}
impl GpuContext {
pub async fn batch_icp_align(&self, jobs: &[BatchICPJob]) -> Result<Vec<BatchICPResult>> {
let mut results = Vec::with_capacity(jobs.len());
const BATCH_SIZE: usize = 4;
for batch in jobs.chunks(BATCH_SIZE) {
let batch_results = self.process_icp_batch(batch).await?;
results.extend(batch_results);
}
Ok(results)
}
async fn process_icp_batch(&self, jobs: &[BatchICPJob]) -> Result<Vec<BatchICPResult>> {
let mut results = Vec::with_capacity(jobs.len());
for job in jobs {
let result = self.optimized_icp_align(
&job.source,
&job.target,
job.max_iterations,
job.convergence_threshold,
job.max_correspondence_distance,
).await?;
results.push(result);
}
Ok(results)
}
async fn optimized_icp_align(
&self,
source: &PointCloud<Point3f>,
target: &PointCloud<Point3f>,
max_iterations: usize,
convergence_threshold: f32,
max_correspondence_distance: f32,
) -> Result<BatchICPResult> {
if source.is_empty() || target.is_empty() {
return Err(threecrate_core::Error::InvalidData("Empty point clouds".to_string()));
}
let mut current_transform = Isometry3::identity();
let mut transformed_source = source.clone();
let mut final_error = f32::INFINITY;
let mut iterations_used = 0;
for iteration in 0..max_iterations {
iterations_used = iteration + 1;
let correspondences = self.find_correspondences(
&transformed_source.points,
&target.points,
max_correspondence_distance,
).await?;
if correspondences.is_empty() {
break;
}
let (transform_delta, error) = self.compute_transformation_gpu(
&transformed_source.points,
&target.points,
&correspondences
).await?;
final_error = error;
current_transform = transform_delta * current_transform;
transformed_source = source.clone();
for point in &mut transformed_source.points {
*point = current_transform.transform_point(point);
}
let translation_norm = transform_delta.translation.vector.norm();
let rotation_angle = transform_delta.rotation.angle();
if translation_norm < convergence_threshold && rotation_angle < convergence_threshold {
break;
}
}
Ok(BatchICPResult {
transformation: current_transform,
final_error,
iterations: iterations_used,
})
}
async fn compute_transformation_gpu(
&self,
source_points: &[Point3f],
target_points: &[Point3f],
correspondences: &[(usize, usize, f32)],
) -> Result<(Isometry3<f32>, f32)> {
if correspondences.is_empty() {
return Ok((Isometry3::identity(), 0.0));
}
let source_data: Vec<[f32; 4]> = source_points
.iter()
.map(|p| [p.x, p.y, p.z, 0.0])
.collect();
let target_data: Vec<[f32; 4]> = target_points
.iter()
.map(|p| [p.x, p.y, p.z, 0.0])
.collect();
let correspondence_indices: Vec<u32> = correspondences
.iter()
.map(|(_, target_idx, _)| *target_idx as u32)
.collect();
let source_buffer = self.create_buffer_init("Source Points", &source_data, wgpu::BufferUsages::STORAGE);
let target_buffer = self.create_buffer_init("Target Points", &target_data, wgpu::BufferUsages::STORAGE);
let correspondence_buffer = self.create_buffer_init("Correspondences", &correspondence_indices, wgpu::BufferUsages::STORAGE);
let centroids = self.compute_centroids_gpu(
&source_buffer,
&target_buffer,
&correspondence_buffer,
correspondences.len(),
).await?;
let covariance = self.compute_covariance_cpu(
source_points,
target_points,
correspondences,
¢roids,
)?;
let transformation = self.svd_to_transformation(&covariance, ¢roids)?;
let error = correspondences.iter().map(|(_, _, dist)| dist).sum::<f32>() / correspondences.len() as f32;
Ok((transformation, error))
}
async fn compute_centroids_gpu(
&self,
source_buffer: &wgpu::Buffer,
target_buffer: &wgpu::Buffer,
correspondence_buffer: &wgpu::Buffer,
num_correspondences: usize,
) -> Result<Vec<[f32; 3]>> {
#[repr(C)]
#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
struct CentroidParams {
num_correspondences: u32,
}
let params = CentroidParams {
num_correspondences: num_correspondences as u32,
};
let params_buffer = self.create_buffer_init("Centroid Params", &[params], wgpu::BufferUsages::UNIFORM);
let centroids_buffer = self.create_buffer("Centroids", 2 * std::mem::size_of::<[f32; 3]>() as u64, wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC);
let shader = self.create_shader_module("Centroid Computation", ICP_CENTROID_SHADER);
let bind_group_layout = self.create_bind_group_layout("Centroid Layout", &[
wgpu::BindGroupLayoutEntry { binding: 0, visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None }, count: None },
wgpu::BindGroupLayoutEntry { binding: 1, visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None }, count: None },
wgpu::BindGroupLayoutEntry { binding: 2, visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None }, count: None },
wgpu::BindGroupLayoutEntry { binding: 3, visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, min_binding_size: None }, count: None },
wgpu::BindGroupLayoutEntry { binding: 4, visibility: wgpu::ShaderStages::COMPUTE, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, min_binding_size: None }, count: None },
]);
let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: Some("Centroid Pipeline"),
layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("Centroid Layout"),
bind_group_layouts: &[Some(&bind_group_layout)],
immediate_size: 0,
})),
module: &shader,
entry_point: Some("main"),
compilation_options: wgpu::PipelineCompilationOptions::default(),
cache: None,
});
let bind_group = self.create_bind_group("Centroid Bind Group", &bind_group_layout, &[
wgpu::BindGroupEntry { binding: 0, resource: source_buffer.as_entire_binding() },
wgpu::BindGroupEntry { binding: 1, resource: target_buffer.as_entire_binding() },
wgpu::BindGroupEntry { binding: 2, resource: correspondence_buffer.as_entire_binding() },
wgpu::BindGroupEntry { binding: 3, resource: centroids_buffer.as_entire_binding() },
wgpu::BindGroupEntry { binding: 4, resource: params_buffer.as_entire_binding() },
]);
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("Centroid Computation") });
{
let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: Some("Centroid Pass"), timestamp_writes: None });
compute_pass.set_pipeline(&pipeline);
compute_pass.set_bind_group(0, &bind_group, &[]);
compute_pass.dispatch_workgroups(1, 1, 1);
}
let staging_buffer = self.create_buffer("Centroid Staging", 2 * std::mem::size_of::<[f32; 3]>() as u64, wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ);
encoder.copy_buffer_to_buffer(¢roids_buffer, 0, &staging_buffer, 0, 2 * std::mem::size_of::<[f32; 3]>() as u64);
self.queue.submit(std::iter::once(encoder.finish()));
let buffer_slice = staging_buffer.slice(..);
let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
self.device.poll(wgpu::PollType::Wait {
submission_index: None,
timeout: None,
});
if let Some(Ok(())) = receiver.receive().await {
let data = buffer_slice.get_mapped_range();
let centroids: Vec<[f32; 3]> = bytemuck::cast_slice(&data).to_vec();
drop(data);
staging_buffer.unmap();
Ok(centroids)
} else {
Err(threecrate_core::Error::Gpu("Failed to read centroid results".to_string()))
}
}
fn compute_covariance_cpu(
&self,
source_points: &[Point3f],
target_points: &[Point3f],
correspondences: &[(usize, usize, f32)],
centroids: &[[f32; 3]],
) -> Result<nalgebra::Matrix3<f32>> {
let mut h = nalgebra::Matrix3::zeros();
let sc = nalgebra::Vector3::new(centroids[0][0], centroids[0][1], centroids[0][2]);
let tc = nalgebra::Vector3::new(centroids[1][0], centroids[1][1], centroids[1][2]);
for (si, ti, _dist) in correspondences.iter().copied() {
let s = source_points[si];
let t = target_points[ti];
let sv = s.coords - sc;
let tv = t.coords - tc;
h += sv * tv.transpose();
}
Ok(h)
}
fn svd_to_transformation(&self, covariance: &nalgebra::Matrix3<f32>, centroids: &[[f32; 3]]) -> Result<Isometry3<f32>> {
let source_centroid = nalgebra::Vector3::new(centroids[0][0], centroids[0][1], centroids[0][2]);
let target_centroid = nalgebra::Vector3::new(centroids[1][0], centroids[1][1], centroids[1][2]);
let svd = covariance.svd(true, true);
let u = svd.u.ok_or_else(|| threecrate_core::Error::Algorithm("SVD failed".to_string()))?;
let v_t = svd.v_t.ok_or_else(|| threecrate_core::Error::Algorithm("SVD failed".to_string()))?;
let mut rotation = v_t.transpose() * u.transpose();
if rotation.determinant() < 0.0 {
let mut v_corrected = v_t.transpose();
v_corrected.column_mut(2).scale_mut(-1.0);
rotation = v_corrected * u.transpose();
}
let translation = target_centroid - rotation * source_centroid;
Ok(Isometry3::from_parts(
nalgebra::Translation3::from(translation),
nalgebra::UnitQuaternion::from_matrix(&rotation),
))
}
pub async fn icp_align(
&self,
source: &PointCloud<Point3f>,
target: &PointCloud<Point3f>,
max_iterations: usize,
convergence_threshold: f32,
max_correspondence_distance: f32,
) -> Result<Isometry3<f32>> {
let result = self.optimized_icp_align(
source,
target,
max_iterations,
convergence_threshold,
max_correspondence_distance
).await?;
Ok(result.transformation)
}
async fn find_correspondences(
&self,
source_points: &[Point3f],
target_points: &[Point3f],
max_distance: f32,
) -> Result<Vec<(usize, usize, f32)>> {
let source_data: Vec<[f32; 3]> = source_points
.iter()
.map(|p| [p.x, p.y, p.z])
.collect();
let target_data: Vec<[f32; 3]> = target_points
.iter()
.map(|p| [p.x, p.y, p.z])
.collect();
let source_buffer = self.create_buffer_init(
"Source Points",
&source_data,
wgpu::BufferUsages::STORAGE,
);
let target_buffer = self.create_buffer_init(
"Target Points",
&target_data,
wgpu::BufferUsages::STORAGE,
);
let correspondences_buffer = self.create_buffer(
"Correspondences",
(source_data.len() * std::mem::size_of::<u32>()) as u64,
wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
);
let distances_buffer = self.create_buffer(
"Distances",
(source_data.len() * std::mem::size_of::<f32>()) as u64,
wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
);
#[repr(C)]
#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
struct ICPParams {
num_source: u32,
num_target: u32,
max_distance: f32,
}
let params = ICPParams {
num_source: source_data.len() as u32,
num_target: target_data.len() as u32,
max_distance,
};
let params_buffer = self.create_buffer_init(
"ICP Params",
&[params],
wgpu::BufferUsages::UNIFORM,
);
let shader = self.create_shader_module("ICP Nearest Neighbor", ICP_NEAREST_NEIGHBOR_SHADER);
let bind_group_layout = self.create_bind_group_layout(
"ICP Correspondence",
&[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: false },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 3,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: false },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 4,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
],
);
let pipeline = self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: Some("ICP Correspondence"),
layout: Some(&self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("ICP Pipeline Layout"),
bind_group_layouts: &[Some(&bind_group_layout)],
immediate_size: 0,
})),
module: &shader,
entry_point: Some("main"),
compilation_options: wgpu::PipelineCompilationOptions::default(),
cache: None,
});
let bind_group = self.create_bind_group(
"ICP Correspondence",
&bind_group_layout,
&[
wgpu::BindGroupEntry {
binding: 0,
resource: source_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 1,
resource: target_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 2,
resource: correspondences_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 3,
resource: distances_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 4,
resource: params_buffer.as_entire_binding(),
},
],
);
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("ICP Correspondence"),
});
{
let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: Some("ICP Correspondence Pass"),
timestamp_writes: None,
});
compute_pass.set_pipeline(&pipeline);
compute_pass.set_bind_group(0, &bind_group, &[]);
let workgroup_count = (source_data.len() + 63) / 64;
compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
}
let correspondences_staging = self.create_buffer(
"Correspondences Staging",
(source_data.len() * std::mem::size_of::<u32>()) as u64,
wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
);
let distances_staging = self.create_buffer(
"Distances Staging",
(source_data.len() * std::mem::size_of::<f32>()) as u64,
wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
);
encoder.copy_buffer_to_buffer(
&correspondences_buffer,
0,
&correspondences_staging,
0,
(source_data.len() * std::mem::size_of::<u32>()) as u64,
);
encoder.copy_buffer_to_buffer(
&distances_buffer,
0,
&distances_staging,
0,
(source_data.len() * std::mem::size_of::<f32>()) as u64,
);
self.queue.submit(std::iter::once(encoder.finish()));
let corr_slice = correspondences_staging.slice(..);
let dist_slice = distances_staging.slice(..);
let (corr_sender, corr_receiver) = futures_intrusive::channel::shared::oneshot_channel();
let (dist_sender, dist_receiver) = futures_intrusive::channel::shared::oneshot_channel();
corr_slice.map_async(wgpu::MapMode::Read, move |v| corr_sender.send(v).unwrap());
dist_slice.map_async(wgpu::MapMode::Read, move |v| dist_sender.send(v).unwrap());
self.device.poll(wgpu::PollType::Wait {
submission_index: None,
timeout: None,
});
if let (Some(Ok(())), Some(Ok(()))) = (corr_receiver.receive().await, dist_receiver.receive().await) {
let corr_data = corr_slice.get_mapped_range();
let dist_data = dist_slice.get_mapped_range();
let correspondences: Vec<u32> = bytemuck::cast_slice(&corr_data).to_vec();
let distances: Vec<f32> = bytemuck::cast_slice(&dist_data).to_vec();
let result: Vec<(usize, usize, f32)> = correspondences
.into_iter()
.zip(distances.into_iter())
.enumerate()
.filter(|(_, (_, distance))| *distance < max_distance)
.map(|(i, (target_idx, distance))| (i, target_idx as usize, distance))
.collect();
drop(corr_data);
drop(dist_data);
correspondences_staging.unmap();
distances_staging.unmap();
Ok(result)
} else {
Err(threecrate_core::Error::Gpu("Failed to read GPU correspondence results".to_string()))
}
}
}
#[derive(Debug, Clone)]
pub struct GpuPointToPlaneICPResult {
pub transformation: Isometry3<f32>,
pub final_error: f32,
pub iterations: usize,
pub converged: bool,
}
impl GpuContext {
pub async fn icp_point_to_plane_align(
&self,
source: &PointCloud<Point3f>,
target: &PointCloud<Point3f>,
target_normals: &[Vector3f],
max_iterations: usize,
convergence_threshold: f32,
max_correspondence_distance: f32,
) -> Result<GpuPointToPlaneICPResult> {
if source.is_empty() || target.is_empty() {
return Err(threecrate_core::Error::InvalidData("Empty point clouds".to_string()));
}
if target_normals.len() != target.points.len() {
return Err(threecrate_core::Error::InvalidData(
"target_normals length must equal target point count".to_string(),
));
}
let mut current_transform = Isometry3::identity();
let mut final_error = f32::INFINITY;
let mut iterations_used = 0;
let mut converged = false;
for iteration in 0..max_iterations {
iterations_used = iteration + 1;
let mut transformed_source = source.clone();
for p in &mut transformed_source.points {
*p = current_transform.transform_point(p);
}
let correspondences = self
.find_correspondences(
&transformed_source.points,
&target.points,
max_correspondence_distance,
)
.await?;
if correspondences.len() < 6 {
break;
}
let valid_source: Vec<Point3f> =
correspondences.iter().map(|(si, _, _)| transformed_source.points[*si]).collect();
let valid_target: Vec<Point3f> =
correspondences.iter().map(|(_, ti, _)| target.points[*ti]).collect();
let valid_normals: Vec<Vector3f> =
correspondences.iter().map(|(_, ti, _)| target_normals[*ti]).collect();
let delta = Self::solve_point_to_plane_cpu(&valid_source, &valid_target, &valid_normals)?;
current_transform = delta * current_transform;
let mse: f32 = valid_source
.iter()
.zip(valid_target.iter())
.zip(valid_normals.iter())
.map(|((s, d), n)| {
let v = n.dot(&(d.coords - s.coords));
v * v
})
.sum::<f32>()
/ valid_source.len() as f32;
let translation_change = delta.translation.vector.norm();
let rotation_change = delta.rotation.angle();
final_error = mse;
if translation_change < convergence_threshold && rotation_change < convergence_threshold {
converged = true;
break;
}
}
Ok(GpuPointToPlaneICPResult {
transformation: current_transform,
final_error,
iterations: iterations_used,
converged,
})
}
fn solve_point_to_plane_cpu(
source_points: &[Point3f],
target_points: &[Point3f],
normals: &[Vector3f],
) -> Result<Isometry3<f32>> {
let mut ata = Matrix6::<f32>::zeros();
let mut atb = Vector6::<f32>::zeros();
for ((src, tgt), n) in source_points
.iter()
.zip(target_points.iter())
.zip(normals.iter())
{
let c = src.coords.cross(n);
let a_row = Vector6::new(c.x, c.y, c.z, n.x, n.y, n.z);
let b_i = n.dot(&(tgt.coords - src.coords));
ata += a_row * a_row.transpose();
atb += a_row * b_i;
}
let x = if let Some(chol) = ata.cholesky() {
chol.solve(&atb)
} else {
ata.lu()
.solve(&atb)
.ok_or_else(|| threecrate_core::Error::Algorithm(
"Point-to-plane GPU system is ill-conditioned".to_string(),
))?
};
let rot_x = UnitQuaternion::from_axis_angle(&nalgebra::Vector3::x_axis(), x[0]);
let rot_y = UnitQuaternion::from_axis_angle(&nalgebra::Vector3::y_axis(), x[1]);
let rot_z = UnitQuaternion::from_axis_angle(&nalgebra::Vector3::z_axis(), x[2]);
Ok(Isometry3::from_parts(
Translation3::new(x[3], x[4], x[5]),
rot_z * rot_y * rot_x,
))
}
}
pub async fn gpu_icp(
gpu_context: &GpuContext,
source: &PointCloud<Point3f>,
target: &PointCloud<Point3f>,
max_iterations: usize,
convergence_threshold: f32,
max_correspondence_distance: f32,
) -> Result<Isometry3<f32>> {
gpu_context.icp_align(source, target, max_iterations, convergence_threshold, max_correspondence_distance).await
}
pub async fn gpu_batch_icp(
gpu_context: &GpuContext,
jobs: &[BatchICPJob],
) -> Result<Vec<BatchICPResult>> {
gpu_context.batch_icp_align(jobs).await
}
pub async fn gpu_icp_point_to_plane(
gpu_context: &GpuContext,
source: &PointCloud<Point3f>,
target: &PointCloud<Point3f>,
target_normals: &[Vector3f],
max_iterations: usize,
convergence_threshold: f32,
max_correspondence_distance: f32,
) -> Result<GpuPointToPlaneICPResult> {
gpu_context
.icp_point_to_plane_align(
source,
target,
target_normals,
max_iterations,
convergence_threshold,
max_correspondence_distance,
)
.await
}