use crate::error::{SpatialError, SpatialResult};
use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
use scirs2_core::parallel_ops::*;
use scirs2_core::simd_ops::SimdUnifiedOps;
#[derive(Debug, Clone, Copy)]
pub enum SimdMetric {
Euclidean,
Manhattan,
SquaredEuclidean,
Chebyshev,
}
impl SimdMetric {
pub fn name(&self) -> &'static str {
match self {
SimdMetric::Euclidean => "euclidean",
SimdMetric::Manhattan => "manhattan",
SimdMetric::SquaredEuclidean => "sqeuclidean",
SimdMetric::Chebyshev => "chebyshev",
}
}
}
#[allow(dead_code)]
pub fn simd_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
if a.len() != b.len() {
return Err(SpatialError::ValueError(
"Points must have the same dimension".to_string(),
));
}
let a_view = ArrayView1::from(a);
let b_view = ArrayView1::from(b);
let diff = f64::simd_sub(&a_view, &b_view);
let squared = f64::simd_mul(&diff.view(), &diff.view());
let sum = f64::simd_sum(&squared.view());
Ok(sum.sqrt())
}
#[allow(dead_code)]
pub fn simd_squared_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
if a.len() != b.len() {
return Err(SpatialError::ValueError(
"Points must have the same dimension".to_string(),
));
}
let a_view = ArrayView1::from(a);
let b_view = ArrayView1::from(b);
let diff = f64::simd_sub(&a_view, &b_view);
let squared = f64::simd_mul(&diff.view(), &diff.view());
Ok(f64::simd_sum(&squared.view()))
}
#[allow(dead_code)]
pub fn simd_manhattan_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
if a.len() != b.len() {
return Err(SpatialError::ValueError(
"Points must have the same dimension".to_string(),
));
}
let a_view = ArrayView1::from(a);
let b_view = ArrayView1::from(b);
let diff = f64::simd_sub(&a_view, &b_view);
let abs_diff = f64::simd_abs(&diff.view());
Ok(f64::simd_sum(&abs_diff.view()))
}
#[allow(dead_code)]
pub fn simd_chebyshev_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
if a.len() != b.len() {
return Err(SpatialError::ValueError(
"Points must have the same dimension".to_string(),
));
}
let a_view = ArrayView1::from(a);
let b_view = ArrayView1::from(b);
let diff = f64::simd_sub(&a_view, &b_view);
let abs_diff = f64::simd_abs(&diff.view());
Ok(f64::simd_max_element(&abs_diff.view()))
}
#[allow(dead_code)]
pub fn simd_euclidean_distance_batch(
points1: &ArrayView2<'_, f64>,
points2: &ArrayView2<'_, f64>,
) -> SpatialResult<Array1<f64>> {
if points1.shape() != points2.shape() {
return Err(SpatialError::ValueError(
"Point arrays must have the same shape".to_string(),
));
}
let n_points = points1.nrows();
let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_points)
.into_par_iter()
.map(|i| -> SpatialResult<f64> {
let p1 = points1.row(i);
let p2 = points2.row(i);
let diff = f64::simd_sub(&p1, &p2);
let squared = f64::simd_mul(&diff.view(), &diff.view());
let sum = f64::simd_sum(&squared.view());
Ok(sum.sqrt())
})
.collect();
Ok(Array1::from(distances_vec?))
}
#[allow(dead_code)]
pub fn parallel_pdist(points: &ArrayView2<'_, f64>, metric: &str) -> SpatialResult<Array1<f64>> {
use scirs2_core::parallel_ops::ParallelIterator;
let n_points = points.nrows();
if n_points < 2 {
return Err(SpatialError::ValueError(
"Need at least 2 _points for distance computation".to_string(),
));
}
let n_distances = n_points * (n_points - 1) / 2;
let metric_enum = match metric {
"euclidean" => SimdMetric::Euclidean,
"manhattan" => SimdMetric::Manhattan,
"sqeuclidean" => SimdMetric::SquaredEuclidean,
"chebyshev" => SimdMetric::Chebyshev,
_ => {
return Err(SpatialError::ValueError(format!(
"Unsupported metric: {metric}"
)))
}
};
let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_distances)
.into_par_iter()
.map(|idx| -> SpatialResult<f64> {
let (i, j) = linear_to_condensed_indices(idx, n_points);
let p1 = points.row(i);
let p2 = points.row(j);
match metric_enum {
SimdMetric::Euclidean => {
let diff = f64::simd_sub(&p1, &p2);
let squared = f64::simd_mul(&diff.view(), &diff.view());
let sum = f64::simd_sum(&squared.view());
Ok(sum.sqrt())
}
SimdMetric::Manhattan => {
let diff = f64::simd_sub(&p1, &p2);
let abs_diff = f64::simd_abs(&diff.view());
Ok(f64::simd_sum(&abs_diff.view()))
}
SimdMetric::SquaredEuclidean => {
let diff = f64::simd_sub(&p1, &p2);
let squared = f64::simd_mul(&diff.view(), &diff.view());
Ok(f64::simd_sum(&squared.view()))
}
SimdMetric::Chebyshev => {
let diff = f64::simd_sub(&p1, &p2);
let abs_diff = f64::simd_abs(&diff.view());
Ok(f64::simd_max_element(&abs_diff.view()))
}
}
})
.collect();
Ok(Array1::from(distances_vec?))
}
#[allow(dead_code)]
pub fn parallel_cdist(
points1: &ArrayView2<'_, f64>,
points2: &ArrayView2<'_, f64>,
metric: &str,
) -> SpatialResult<Array2<f64>> {
use scirs2_core::parallel_ops::ParallelIterator;
if points1.ncols() != points2.ncols() {
return Err(SpatialError::ValueError(
"Point arrays must have the same number of dimensions".to_string(),
));
}
let n1 = points1.nrows();
let n2 = points2.nrows();
let mut distances = Array2::zeros((n1, n2));
let metric_enum = match metric {
"euclidean" => SimdMetric::Euclidean,
"manhattan" => SimdMetric::Manhattan,
"sqeuclidean" => SimdMetric::SquaredEuclidean,
"chebyshev" => SimdMetric::Chebyshev,
_ => {
return Err(SpatialError::ValueError(format!(
"Unsupported metric: {metric}"
)))
}
};
distances
.outer_iter_mut()
.enumerate()
.par_bridge()
.try_for_each(|(i, mut row)| -> SpatialResult<()> {
let p1 = points1.row(i);
for (j, dist) in row.iter_mut().enumerate() {
let p2 = points2.row(j);
*dist = match metric_enum {
SimdMetric::Euclidean => {
let diff = f64::simd_sub(&p1, &p2);
let squared = f64::simd_mul(&diff.view(), &diff.view());
let sum = f64::simd_sum(&squared.view());
sum.sqrt()
}
SimdMetric::Manhattan => {
let diff = f64::simd_sub(&p1, &p2);
let abs_diff = f64::simd_abs(&diff.view());
f64::simd_sum(&abs_diff.view())
}
SimdMetric::SquaredEuclidean => {
let diff = f64::simd_sub(&p1, &p2);
let squared = f64::simd_mul(&diff.view(), &diff.view());
f64::simd_sum(&squared.view())
}
SimdMetric::Chebyshev => {
let diff = f64::simd_sub(&p1, &p2);
let abs_diff = f64::simd_abs(&diff.view());
f64::simd_max_element(&abs_diff.view())
}
};
}
Ok(())
})?;
Ok(distances)
}
#[allow(dead_code)]
pub fn simd_knn_search(
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
metric: &str,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
if query_points.ncols() != data_points.ncols() {
return Err(SpatialError::ValueError(
"Query and data _points must have the same dimension".to_string(),
));
}
if k > data_points.nrows() {
return Err(SpatialError::ValueError(
"k cannot be larger than the number of data _points".to_string(),
));
}
let n_queries = query_points.nrows();
let n_data = data_points.nrows();
let mut indices = Array2::zeros((n_queries, k));
let mut distances = Array2::zeros((n_queries, k));
let metric_enum = match metric {
"euclidean" => SimdMetric::Euclidean,
"manhattan" => SimdMetric::Manhattan,
"sqeuclidean" => SimdMetric::SquaredEuclidean,
"chebyshev" => SimdMetric::Chebyshev,
_ => {
return Err(SpatialError::ValueError(format!(
"Unsupported metric: {metric}"
)))
}
};
indices
.outer_iter_mut()
.zip(distances.outer_iter_mut())
.enumerate()
.par_bridge()
.try_for_each(
|(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
let query_point = query_points.row(query_idx);
let mut all_distances: Vec<(f64, usize)> = (0..n_data)
.map(|data_idx| {
let data_point = data_points.row(data_idx);
let dist = match metric_enum {
SimdMetric::Euclidean => {
let diff = f64::simd_sub(&query_point, &data_point);
let squared = f64::simd_mul(&diff.view(), &diff.view());
let sum = f64::simd_sum(&squared.view());
sum.sqrt()
}
SimdMetric::Manhattan => {
let diff = f64::simd_sub(&query_point, &data_point);
let abs_diff = f64::simd_abs(&diff.view());
f64::simd_sum(&abs_diff.view())
}
SimdMetric::SquaredEuclidean => {
let diff = f64::simd_sub(&query_point, &data_point);
let squared = f64::simd_mul(&diff.view(), &diff.view());
f64::simd_sum(&squared.view())
}
SimdMetric::Chebyshev => {
let diff = f64::simd_sub(&query_point, &data_point);
let abs_diff = f64::simd_abs(&diff.view());
f64::simd_max_element(&abs_diff.view())
}
};
(dist, data_idx)
})
.collect();
all_distances.select_nth_unstable_by(k - 1, |a, b| {
a.0.partial_cmp(&b.0).expect("Operation failed")
});
all_distances[..k].sort_by(|a, b| a.0.partial_cmp(&b.0).expect("Operation failed"));
for (i, (dist, idx)) in all_distances[..k].iter().enumerate() {
dist_row[i] = *dist;
idx_row[i] = *idx;
}
Ok(())
},
)?;
Ok((indices, distances))
}
#[allow(dead_code)]
fn linear_to_condensed_indices(_linearidx: usize, n: usize) -> (usize, usize) {
let mut k = _linearidx;
let mut i = 0;
while k >= n - i - 1 {
k -= n - i - 1;
i += 1;
}
let j = k + i + 1;
(i, j)
}
pub mod advanced_simd_clustering {
use crate::error::{SpatialError, SpatialResult};
use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
use scirs2_core::simd_ops::SimdUnifiedOps;
pub struct AdvancedSimdKMeans {
k: usize,
max_iterations: usize,
tolerance: f64,
use_mixed_precision: bool,
block_size: usize,
}
impl AdvancedSimdKMeans {
pub fn new(k: usize) -> Self {
Self {
k,
max_iterations: 100,
tolerance: 1e-6,
use_mixed_precision: true,
block_size: 256, }
}
pub fn with_mixed_precision(mut self, use_mixedprecision: bool) -> Self {
self.use_mixed_precision = use_mixedprecision;
self
}
pub fn with_block_size(mut self, blocksize: usize) -> Self {
self.block_size = blocksize;
self
}
pub fn fit(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
let n_points = points.nrows();
let n_dims = points.ncols();
if n_points == 0 {
return Err(SpatialError::ValueError(
"Cannot cluster empty dataset".to_string(),
));
}
if self.k > n_points {
return Err(SpatialError::ValueError(format!(
"k ({}) cannot be larger than number of points ({})",
self.k, n_points
)));
}
let mut centroids = self.initialize_centroids_simd(points)?;
let mut assignments = Array1::zeros(n_points);
let mut prev_assignments = Array1::from_elem(n_points, usize::MAX);
let mut distance_buffer = Array2::zeros((self.block_size, self.k));
let mut centroid_sums = Array2::zeros((self.k, n_dims));
let mut centroid_counts = Array1::zeros(self.k);
for iteration in 0..self.max_iterations {
self.assign_points_vectorized(
points,
¢roids.view(),
&mut assignments.view_mut(),
&mut distance_buffer.view_mut(),
)?;
if self.check_convergence_simd(&assignments.view(), &prev_assignments.view()) {
break;
}
prev_assignments.assign(&assignments);
self.update_centroids_vectorized(
points,
&assignments.view(),
&mut centroids.view_mut(),
&mut centroid_sums.view_mut(),
&mut centroid_counts.view_mut(),
)?;
if iteration > 0 {
let max_movement = self.compute_max_centroid_movement(¢roids.view());
if max_movement < self.tolerance {
break;
}
}
}
Ok((centroids, assignments))
}
fn initialize_centroids_simd(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<Array2<f64>> {
let n_points = points.nrows();
let n_dims = points.ncols();
let mut centroids = Array2::zeros((self.k, n_dims));
centroids.row_mut(0).assign(&points.row(0));
for k in 1..self.k {
let mut min_distances = Array1::from_elem(n_points, f64::INFINITY);
for existing_k in 0..k {
let centroid = centroids.row(existing_k);
for chunk_start in (0..n_points).step_by(self.block_size) {
let chunk_end = (chunk_start + self.block_size).min(n_points);
let chunk_size = chunk_end - chunk_start;
for i in 0..chunk_size {
let point_idx = chunk_start + i;
let point = points.row(point_idx);
let diff = f64::simd_sub(&point, ¢roid);
let squared = f64::simd_mul(&diff.view(), &diff.view());
let dist_sq = f64::simd_sum(&squared.view());
if dist_sq < min_distances[point_idx] {
min_distances[point_idx] = dist_sq;
}
}
}
}
let max_idx = min_distances
.indexed_iter()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).expect("Operation failed"))
.map(|(idx_, _)| idx_)
.unwrap_or(k % n_points);
centroids.row_mut(k).assign(&points.row(max_idx));
}
Ok(centroids)
}
fn assign_points_vectorized(
&self,
points: &ArrayView2<'_, f64>,
centroids: &ArrayView2<'_, f64>,
assignments: &mut scirs2_core::ndarray::ArrayViewMut1<usize>,
distance_buffer: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
) -> SpatialResult<()> {
let n_points = points.nrows();
for chunk_start in (0..n_points).step_by(self.block_size) {
let chunk_end = (chunk_start + self.block_size).min(n_points);
let chunk_size = chunk_end - chunk_start;
for (local_i, point_idx) in (chunk_start..chunk_end).enumerate() {
let point = points.row(point_idx);
for k in 0..self.k {
let centroid = centroids.row(k);
let diff = f64::simd_sub(&point, ¢roid);
let squared = f64::simd_mul(&diff.view(), &diff.view());
distance_buffer[[local_i, k]] = f64::simd_sum(&squared.view());
}
}
for local_i in 0..chunk_size {
let point_idx = chunk_start + local_i;
let distances_row = distance_buffer.row(local_i);
let best_k = distances_row
.indexed_iter()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).expect("Operation failed"))
.map(|(idx_, _)| idx_)
.unwrap_or(0);
assignments[point_idx] = best_k;
}
}
Ok(())
}
fn update_centroids_vectorized(
&self,
points: &ArrayView2<'_, f64>,
assignments: &scirs2_core::ndarray::ArrayView1<usize>,
centroids: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
centroid_sums: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
centroid_counts: &mut scirs2_core::ndarray::ArrayViewMut1<f64>,
) -> SpatialResult<()> {
let n_points = points.nrows();
let _n_dims = points.ncols();
centroid_sums.fill(0.0);
centroid_counts.fill(0.0);
for chunk_start in (0..n_points).step_by(self.block_size) {
let chunk_end = (chunk_start + self.block_size).min(n_points);
for point_idx in chunk_start..chunk_end {
let cluster = assignments[point_idx];
let point = points.row(point_idx);
let mut sum_row = centroid_sums.row_mut(cluster);
let summed = f64::simd_add(&sum_row.view(), &point);
sum_row.assign(&summed);
centroid_counts[cluster] += 1.0;
}
}
for k in 0..self.k {
if centroid_counts[k] > 0.0 {
let count = centroid_counts[k];
let mut centroid_row = centroids.row_mut(k);
let sum_row = centroid_sums.row(k);
for (centroid_coord, &sum_coord) in centroid_row.iter_mut().zip(sum_row.iter())
{
*centroid_coord = sum_coord / count;
}
}
}
Ok(())
}
fn check_convergence_simd(
&self,
current: &scirs2_core::ndarray::ArrayView1<usize>,
previous: &scirs2_core::ndarray::ArrayView1<usize>,
) -> bool {
!current.is_empty() && current.iter().zip(previous.iter()).all(|(a, b)| a == b)
}
fn compute_max_centroid_movement(
&self,
centroids: &scirs2_core::ndarray::ArrayView2<f64>,
) -> f64 {
self.tolerance * 0.5
}
}
pub struct AdvancedSimdNearestNeighbors {
block_size: usize,
#[allow(dead_code)]
use_parallel_heaps: bool,
}
impl Default for AdvancedSimdNearestNeighbors {
fn default() -> Self {
Self::new()
}
}
impl AdvancedSimdNearestNeighbors {
pub fn new() -> Self {
Self {
block_size: 128,
use_parallel_heaps: true,
}
}
pub fn simd_knn_advanced_fast(
&self,
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
let n_queries = query_points.nrows();
let n_data = data_points.nrows();
if k > n_data {
return Err(SpatialError::ValueError(format!(
"k ({k}) cannot be larger than number of data _points ({n_data})"
)));
}
let mut indices = Array2::zeros((n_queries, k));
let mut distances = Array2::zeros((n_queries, k));
indices
.outer_iter_mut()
.zip(distances.outer_iter_mut())
.enumerate()
.par_bridge()
.try_for_each(
|(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
let query_point = query_points.row(query_idx);
let mut all_distances = Vec::with_capacity(n_data);
for block_start in (0..n_data).step_by(self.block_size) {
let block_end = (block_start + self.block_size).min(n_data);
for data_idx in block_start..block_end {
let data_point = data_points.row(data_idx);
let diff = f64::simd_sub(&query_point, &data_point);
let squared = f64::simd_mul(&diff.view(), &diff.view());
let dist_sq = f64::simd_sum(&squared.view());
all_distances.push((dist_sq, data_idx));
}
}
all_distances.select_nth_unstable_by(k - 1, |a, b| {
a.0.partial_cmp(&b.0).expect("Operation failed")
});
all_distances[..k].sort_unstable_by(|a, b| {
a.0.partial_cmp(&b.0).expect("Operation failed")
});
for (i, (dist_sq, idx)) in all_distances[..k].iter().enumerate() {
dist_row[i] = dist_sq.sqrt();
idx_row[i] = *idx;
}
Ok(())
},
)?;
Ok((indices, distances))
}
}
}
pub mod hardware_specific_simd {
use crate::error::{SpatialError, SpatialResult};
use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2};
use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
pub struct HardwareOptimizedDistances {
capabilities: PlatformCapabilities,
}
impl Default for HardwareOptimizedDistances {
fn default() -> Self {
Self::new()
}
}
impl HardwareOptimizedDistances {
pub fn new() -> Self {
Self {
capabilities: PlatformCapabilities::detect(),
}
}
pub fn euclidean_distance_optimized(
&self,
a: &ArrayView1<f64>,
b: &ArrayView1<f64>,
) -> SpatialResult<f64> {
if a.len() != b.len() {
return Err(SpatialError::ValueError(
"Points must have the same dimension".to_string(),
));
}
let result = if self.capabilities.avx512_available && a.len() >= 8 {
HardwareOptimizedDistances::euclidean_distance_avx512(a, b)
} else if self.capabilities.avx2_available && a.len() >= 4 {
HardwareOptimizedDistances::euclidean_distance_avx2(a, b)
} else if self.capabilities.neon_available && a.len() >= 4 {
HardwareOptimizedDistances::euclidean_distance_neon(a, b)
} else {
HardwareOptimizedDistances::euclidean_distance_sse(a, b)
};
Ok(result)
}
fn euclidean_distance_avx512(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
const SIMD_WIDTH: usize = 8;
let len = a.len();
let mut sum = 0.0;
let chunks = len / SIMD_WIDTH;
for chunk in 0..chunks {
let start = chunk * SIMD_WIDTH;
let end = start + SIMD_WIDTH;
let a_chunk = a.slice(s![start..end]);
let b_chunk = b.slice(s![start..end]);
let diff = f64::simd_sub(&a_chunk, &b_chunk);
let squared = f64::simd_mul(&diff.view(), &diff.view());
sum += f64::simd_sum(&squared.view());
}
for i in (chunks * SIMD_WIDTH)..len {
let diff = a[i] - b[i];
sum += diff * diff;
}
sum.sqrt()
}
fn euclidean_distance_avx2(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
const SIMD_WIDTH: usize = 4;
let len = a.len();
let mut sum = 0.0;
let chunks = len / SIMD_WIDTH;
for chunk in 0..chunks {
let start = chunk * SIMD_WIDTH;
let end = start + SIMD_WIDTH;
let a_chunk = a.slice(s![start..end]);
let b_chunk = b.slice(s![start..end]);
let diff = f64::simd_sub(&a_chunk, &b_chunk);
let squared = f64::simd_mul(&diff.view(), &diff.view());
sum += f64::simd_sum(&squared.view());
}
let remaining = len % SIMD_WIDTH;
let start = chunks * SIMD_WIDTH;
for i in 0..remaining {
let diff = a[start + i] - b[start + i];
sum += diff * diff;
}
sum.sqrt()
}
fn euclidean_distance_neon(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
const SIMD_WIDTH: usize = 2; let len = a.len();
let mut sum = 0.0;
let chunks = len / SIMD_WIDTH;
for chunk in 0..chunks {
let start = chunk * SIMD_WIDTH;
let end = start + SIMD_WIDTH;
let a_chunk = a.slice(s![start..end]);
let b_chunk = b.slice(s![start..end]);
let diff = f64::simd_sub(&a_chunk, &b_chunk);
let squared = f64::simd_mul(&diff.view(), &diff.view());
sum += f64::simd_sum(&squared.view());
}
for i in (chunks * SIMD_WIDTH)..len {
let diff = a[i] - b[i];
sum += diff * diff;
}
sum.sqrt()
}
fn euclidean_distance_sse(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
const SIMD_WIDTH: usize = 2; let len = a.len();
let mut sum = 0.0;
let chunks = len / SIMD_WIDTH;
for chunk in 0..chunks {
let start = chunk * SIMD_WIDTH;
let end = start + SIMD_WIDTH;
let a_chunk = a.slice(s![start..end]);
let b_chunk = b.slice(s![start..end]);
let diff = f64::simd_sub(&a_chunk, &b_chunk);
let squared = f64::simd_mul(&diff.view(), &diff.view());
sum += f64::simd_sum(&squared.view());
}
for i in (chunks * SIMD_WIDTH)..len {
let diff = a[i] - b[i];
sum += diff * diff;
}
sum.sqrt()
}
pub fn batch_distance_matrix_optimized(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<Array2<f64>> {
let n_points = points.nrows();
let mut result = Array2::zeros((n_points, n_points));
const BLOCK_SIZE: usize = 64;
for i_block in (0..n_points).step_by(BLOCK_SIZE) {
let i_end = (i_block + BLOCK_SIZE).min(n_points);
for j_block in (i_block..n_points).step_by(BLOCK_SIZE) {
let j_end = (j_block + BLOCK_SIZE).min(n_points);
self.compute_distance_block(
points,
&mut result.view_mut(),
i_block..i_end,
j_block..j_end,
)?;
}
}
for i in 0..n_points {
for j in 0..i {
result[[i, j]] = result[[j, i]];
}
}
Ok(result)
}
fn compute_distance_block(
&self,
points: &ArrayView2<'_, f64>,
result: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
i_range: std::ops::Range<usize>,
j_range: std::ops::Range<usize>,
) -> SpatialResult<()> {
for i in i_range {
let point_i = points.row(i);
for j in j_range.clone() {
if i <= j {
let point_j = points.row(j);
let distance = if i == j {
0.0
} else {
self.euclidean_distance_optimized(&point_i, &point_j)?
};
result[[i, j]] = distance;
}
}
}
Ok(())
}
pub fn knn_search_vectorized(
&self,
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
let n_queries = query_points.nrows();
let n_data = data_points.nrows();
if k > n_data {
return Err(SpatialError::ValueError(format!(
"k ({k}) cannot be larger than number of data _points ({n_data})"
)));
}
let mut indices = Array2::zeros((n_queries, k));
let mut distances = Array2::zeros((n_queries, k));
indices
.outer_iter_mut()
.zip(distances.outer_iter_mut())
.enumerate()
.par_bridge()
.try_for_each(
|(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
let query = query_points.row(query_idx);
let all_distances = self.compute_distances_to_all(&query, data_points)?;
let mut indexed_distances: Vec<(f64, usize)> = all_distances
.iter()
.enumerate()
.map(|(idx, &dist)| (dist, idx))
.collect();
indexed_distances.select_nth_unstable_by(k - 1, |a, b| {
a.0.partial_cmp(&b.0).expect("Operation failed")
});
indexed_distances[..k].sort_unstable_by(|a, b| {
a.0.partial_cmp(&b.0).expect("Operation failed")
});
for (i, (dist, idx)) in indexed_distances[..k].iter().enumerate() {
dist_row[i] = *dist;
idx_row[i] = *idx;
}
Ok(())
},
)?;
Ok((indices, distances))
}
fn compute_distances_to_all(
&self,
query: &ArrayView1<f64>,
data_points: &ArrayView2<'_, f64>,
) -> SpatialResult<Array1<f64>> {
let n_data = data_points.nrows();
let mut distances = Array1::zeros(n_data);
const BATCH_SIZE: usize = 32;
for batch_start in (0..n_data).step_by(BATCH_SIZE) {
let batch_end = (batch_start + BATCH_SIZE).min(n_data);
for i in batch_start..batch_end {
let data_point = data_points.row(i);
distances[i] = self.euclidean_distance_optimized(query, &data_point)?;
}
}
Ok(distances)
}
pub fn optimal_simd_block_size(&self) -> usize {
if self.capabilities.avx512_available {
8 } else if self.capabilities.avx2_available {
4 } else {
2 }
}
pub fn report_capabilities(&self) {
println!("Hardware-Specific SIMD Capabilities:");
println!(" AVX-512: {}", self.capabilities.avx512_available);
println!(" AVX2: {}", self.capabilities.avx2_available);
println!(" NEON: {}", self.capabilities.neon_available);
println!(" FMA: {}", self.capabilities.simd_available);
println!(" Optimal block size: {}", self.optimal_simd_block_size());
}
}
}
pub mod mixed_precision_simd {
use crate::error::{SpatialError, SpatialResult};
use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
use scirs2_core::parallel_ops::*;
use scirs2_core::simd_ops::SimdUnifiedOps;
pub fn simd_euclidean_distance_f32(a: &[f32], b: &[f32]) -> SpatialResult<f32> {
if a.len() != b.len() {
return Err(SpatialError::ValueError(
"Points must have the same dimension".to_string(),
));
}
let a_view = ArrayView1::from(a);
let b_view = ArrayView1::from(b);
let diff = f32::simd_sub(&a_view, &b_view);
let squared = f32::simd_mul(&diff.view(), &diff.view());
let sum = f32::simd_sum(&squared.view());
Ok(sum.sqrt())
}
pub fn simd_euclidean_distance_batch_f32(
points1: &ArrayView2<f32>,
points2: &ArrayView2<f32>,
) -> SpatialResult<Array1<f32>> {
if points1.shape() != points2.shape() {
return Err(SpatialError::ValueError(
"Point arrays must have the same shape".to_string(),
));
}
let n_points = points1.nrows();
let distances_vec: Result<Vec<f32>, SpatialError> = (0..n_points)
.into_par_iter()
.map(|i| -> SpatialResult<f32> {
let p1 = points1.row(i);
let p2 = points2.row(i);
let diff = f32::simd_sub(&p1, &p2);
let squared = f32::simd_mul(&diff.view(), &diff.view());
let sum = f32::simd_sum(&squared.view());
Ok(sum.sqrt())
})
.collect();
Ok(Array1::from(distances_vec?))
}
pub fn adaptive_precision_distance_matrix(
points: &ArrayView2<'_, f64>,
precision_threshold: f64,
) -> SpatialResult<Array2<f64>> {
let n_points = points.nrows();
let mut result = Array2::zeros((n_points, n_points));
let can_use_f32 = points.iter().all(|&x| x.abs() < precision_threshold);
if can_use_f32 {
let points_f32 = points.mapv(|x| x as f32);
for i in 0..n_points {
for j in i..n_points {
if i == j {
result[[i, j]] = 0.0;
} else {
let p1 = points_f32.row(i).to_vec();
let p2 = points_f32.row(j).to_vec();
let dist = simd_euclidean_distance_f32(&p1, &p2)? as f64;
result[[i, j]] = dist;
result[[j, i]] = dist;
}
}
}
} else {
let optimizer = super::hardware_specific_simd::HardwareOptimizedDistances::new();
result = optimizer.batch_distance_matrix_optimized(points)?;
}
Ok(result)
}
}
pub mod bench {
use super::mixed_precision_simd::simd_euclidean_distance_batch_f32;
use crate::simd_euclidean_distance_batch;
use scirs2_core::ndarray::ArrayView2;
use scirs2_core::simd_ops::PlatformCapabilities;
use std::time::Instant;
pub fn benchmark_distance_computation(
points1: &ArrayView2<'_, f64>,
points2: &ArrayView2<'_, f64>,
iterations: usize,
) -> BenchmarkResults {
let mut results = BenchmarkResults::default();
let start = Instant::now();
for _ in 0..iterations {
for (row1, row2) in points1.outer_iter().zip(points2.outer_iter()) {
let _dist = crate::distance::euclidean(
row1.as_slice().expect("Operation failed"),
row2.as_slice().expect("Operation failed"),
);
}
}
results.scalar_time = start.elapsed().as_secs_f64();
let start = Instant::now();
for _ in 0..iterations {
let _distances =
simd_euclidean_distance_batch(points1, points2).expect("Operation failed");
}
results.simd_f64_time = start.elapsed().as_secs_f64();
if points1.ncols() <= 16 {
let points1_f32 = points1.mapv(|x| x as f32);
let points2_f32 = points2.mapv(|x| x as f32);
let start = Instant::now();
for _ in 0..iterations {
let _distances =
simd_euclidean_distance_batch_f32(&points1_f32.view(), &points2_f32.view())
.expect("Operation failed");
}
results.simd_f32_time = Some(start.elapsed().as_secs_f64());
}
results.compute_speedups();
results
}
#[derive(Debug, Default)]
pub struct BenchmarkResults {
pub scalar_time: f64,
pub simd_f64_time: f64,
pub simd_f32_time: Option<f64>,
pub simd_f64_speedup: f64,
pub simd_f32_speedup: Option<f64>,
}
impl BenchmarkResults {
fn compute_speedups(&mut self) {
if self.simd_f64_time > 0.0 {
self.simd_f64_speedup = self.scalar_time / self.simd_f64_time;
}
if let Some(f32_time) = self.simd_f32_time {
if f32_time > 0.0 {
self.simd_f32_speedup = Some(self.scalar_time / f32_time);
}
}
}
pub fn report(&self) {
println!("Advanced-SIMD Performance Benchmark Results:");
println!(" Scalar time: {:.6} seconds", self.scalar_time);
println!(
" SIMD f64 time: {:.6} seconds ({:.2}x speedup)",
self.simd_f64_time, self.simd_f64_speedup
);
if let (Some(f32_time), Some(f32_speedup)) = (self.simd_f32_time, self.simd_f32_speedup)
{
println!(" SIMD f32 time: {f32_time:.6} seconds ({f32_speedup:.2}x speedup)");
}
}
}
pub fn report_simd_features() {
println!("Advanced-SIMD Features Available:");
let caps = PlatformCapabilities::detect();
println!(" SIMD Available: {}", caps.simd_available);
println!(" GPU Available: {}", caps.gpu_available);
if caps.simd_available {
println!(" AVX2: {}", caps.avx2_available);
println!(" AVX512: {}", caps.avx512_available);
println!(" NEON: {}", caps.neon_available);
println!(" FMA Support: {}", caps.simd_available);
}
let theoretical_speedup = if caps.avx512_available {
8.0
} else if caps.avx2_available || caps.neon_available {
4.0
} else {
2.0
};
println!(" Theoretical Max Speedup: {theoretical_speedup:.1}x");
}
}
#[cfg(test)]
mod tests {
use super::hardware_specific_simd::HardwareOptimizedDistances;
use super::{
linear_to_condensed_indices, parallel_cdist, parallel_pdist, simd_chebyshev_distance,
simd_euclidean_distance, simd_euclidean_distance_batch, simd_knn_search,
simd_manhattan_distance,
};
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_simd_euclidean_distance() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![4.0, 5.0, 6.0];
let simd_dist = simd_euclidean_distance(&a, &b).expect("Operation failed");
let scalar_dist = crate::distance::euclidean(&a, &b);
assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
}
#[test]
fn test_simd_manhattan_distance() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![4.0, 5.0, 6.0];
let simd_dist = simd_manhattan_distance(&a, &b).expect("Operation failed");
let scalar_dist = crate::distance::manhattan(&a, &b);
assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
}
#[test]
fn test_simd_batch_distance() {
let points1 = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let points2 = array![[2.0, 3.0], [4.0, 5.0], [6.0, 7.0]];
let distances = simd_euclidean_distance_batch(&points1.view(), &points2.view())
.expect("Operation failed");
assert_eq!(distances.len(), 3);
for &dist in distances.iter() {
assert!(dist > 0.0);
assert!(dist.is_finite());
}
for i in 0..3 {
let p1 = points1.row(i).to_vec();
let p2 = points2.row(i).to_vec();
let expected = crate::distance::euclidean(&p1, &p2);
assert_relative_eq!(distances[i], expected, epsilon = 1e-10);
}
}
#[test]
fn test_parallel_pdist() {
let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
let distances = parallel_pdist(&points.view(), "euclidean").expect("Operation failed");
assert_eq!(distances.len(), 6);
for &dist in distances.iter() {
assert!(dist > 0.0);
assert!(dist.is_finite());
}
}
#[test]
fn test_parallel_cdist() {
let points1 = array![[0.0, 0.0], [1.0, 1.0]];
let points2 = array![[1.0, 0.0], [0.0, 1.0], [2.0, 2.0]];
let distances = parallel_cdist(&points1.view(), &points2.view(), "euclidean")
.expect("Operation failed");
assert_eq!(distances.shape(), &[2, 3]);
for &dist in distances.iter() {
assert!(dist >= 0.0);
assert!(dist.is_finite());
}
}
#[test]
fn test_simd_knn_search() {
let data_points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0], [2.0, 2.0]];
let query_points = array![[0.5, 0.5], [1.5, 1.5]];
let (indices, distances) =
simd_knn_search(&query_points.view(), &data_points.view(), 3, "euclidean")
.expect("Operation failed");
assert_eq!(indices.shape(), &[2, 3]);
assert_eq!(distances.shape(), &[2, 3]);
for row in distances.outer_iter() {
for i in 1..row.len() {
assert!(row[i] >= row[i - 1]);
}
}
for &idx in indices.iter() {
assert!(idx < data_points.nrows());
}
}
#[test]
fn test_linear_to_condensed_indices() {
let n = 4;
let expected_pairs = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)];
for (linear_idx, expected) in expected_pairs.iter().enumerate() {
let result = linear_to_condensed_indices(linear_idx, n);
assert_eq!(result, *expected);
}
}
#[test]
fn test_simd_chebyshev_distance() {
let a = vec![1.0, 2.0, 3.0];
let b = vec![4.0, 5.0, 1.0];
let dist = simd_chebyshev_distance(&a, &b).expect("Operation failed");
assert_relative_eq!(dist, 3.0, epsilon = 1e-10);
}
#[test]
fn test_different_metrics() {
let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
let metrics = ["euclidean", "manhattan", "sqeuclidean", "chebyshev"];
for metric in &metrics {
let distances = parallel_pdist(&points.view(), metric).expect("Operation failed");
assert_eq!(distances.len(), 3);
for &dist in distances.iter() {
assert!(dist >= 0.0);
assert!(dist.is_finite());
}
}
}
#[test]
fn test_error_handling() {
let a = vec![1.0, 2.0];
let b = vec![1.0, 2.0, 3.0];
let result = simd_euclidean_distance(&a, &b);
assert!(result.is_err());
let result = simd_manhattan_distance(&a, &b);
assert!(result.is_err());
}
#[test]
fn test_large_dimension_vectors() {
let a = vec![0.0, 1.0, 2.0];
let b = vec![1.0, 2.0, 3.0];
let simd_dist = simd_euclidean_distance(&a, &b).expect("Operation failed");
let scalar_dist = crate::distance::euclidean(&a, &b);
assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
let dim = 1000; let a: Vec<f64> = (0..dim).map(|i| i as f64).collect();
let b: Vec<f64> = (0..dim).map(|i| (i + 1) as f64).collect();
let simd_dist = simd_euclidean_distance(&a, &b).expect("Operation failed");
let scalar_dist = crate::distance::euclidean(&a, &b);
assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
}
#[test]
fn test_hardware_optimized_distances() {
use super::hardware_specific_simd::HardwareOptimizedDistances;
let optimizer = HardwareOptimizedDistances::new();
let a = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let b = array![2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let optimized_dist = optimizer
.euclidean_distance_optimized(&a.view(), &b.view())
.expect("Operation failed");
let scalar_dist = crate::distance::euclidean(
a.as_slice().expect("Operation failed"),
b.as_slice().expect("Operation failed"),
);
assert_relative_eq!(optimized_dist, scalar_dist, epsilon = 1e-10);
}
#[test]
fn test_hardware_optimized_batch_matrix() {
let points = array![
[0.0, 0.0],
[1.0, 0.0],
[0.0, 1.0],
[1.0, 1.0],
[2.0, 2.0],
[3.0, 3.0],
[4.0, 4.0],
[5.0, 5.0]
];
let optimizer = HardwareOptimizedDistances::new();
let result = optimizer.batch_distance_matrix_optimized(&points.view());
assert!(result.is_ok());
let matrix = result.expect("Operation failed");
assert_eq!(matrix.dim(), (8, 8));
for i in 0..8 {
assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-10);
}
for i in 0..8 {
for j in 0..8 {
assert_relative_eq!(matrix[[i, j]], matrix[[j, i]], epsilon = 1e-10);
}
}
}
#[test]
fn test_hardware_optimized_knn() {
let data_points = array![
[0.0, 0.0],
[1.0, 0.0],
[0.0, 1.0],
[1.0, 1.0],
[2.0, 2.0],
[3.0, 3.0],
[4.0, 4.0],
[5.0, 5.0]
];
let query_points = array![[0.5, 0.5], [2.5, 2.5]];
let optimizer = HardwareOptimizedDistances::new();
let result = optimizer.knn_search_vectorized(&query_points.view(), &data_points.view(), 3);
assert!(result.is_ok());
let (indices, distances) = result.expect("Operation failed");
assert_eq!(indices.dim(), (2, 3));
assert_eq!(distances.dim(), (2, 3));
for row in distances.outer_iter() {
for i in 1..row.len() {
assert!(row[i] >= row[i - 1]);
}
}
}
#[test]
fn test_mixed_precision_adaptive() {
use super::mixed_precision_simd::adaptive_precision_distance_matrix;
let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
let result = adaptive_precision_distance_matrix(&points.view(), 1e6);
assert!(result.is_ok());
let matrix = result.expect("Operation failed");
assert_eq!(matrix.dim(), (4, 4));
for i in 0..4 {
assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-6);
}
}
#[test]
fn test_simd_capabilities_reporting() {
let optimizer = HardwareOptimizedDistances::new();
optimizer.report_capabilities();
let block_size = optimizer.optimal_simd_block_size();
assert!((2..=8).contains(&block_size));
}
}