1use crate::error::{SpatialError, SpatialResult};
43use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
44use scirs2_core::parallel_ops::*;
45use scirs2_core::simd_ops::SimdUnifiedOps;
46
47#[derive(Debug, Clone, Copy)]
49pub enum SimdMetric {
50 Euclidean,
52 Manhattan,
54 SquaredEuclidean,
56 Chebyshev,
58}
59
60impl SimdMetric {
61 pub fn name(&self) -> &'static str {
79 match self {
80 SimdMetric::Euclidean => "euclidean",
81 SimdMetric::Manhattan => "manhattan",
82 SimdMetric::SquaredEuclidean => "sqeuclidean",
83 SimdMetric::Chebyshev => "chebyshev",
84 }
85 }
86}
87
88#[allow(dead_code)]
97pub fn simd_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
98 if a.len() != b.len() {
99 return Err(SpatialError::ValueError(
100 "Points must have the same dimension".to_string(),
101 ));
102 }
103
104 let a_view = ArrayView1::from(a);
106 let b_view = ArrayView1::from(b);
107
108 let diff = f64::simd_sub(&a_view, &b_view);
110 let squared = f64::simd_mul(&diff.view(), &diff.view());
111 let sum = f64::simd_sum(&squared.view());
112 Ok(sum.sqrt())
113}
114
115#[allow(dead_code)]
117pub fn simd_squared_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
118 if a.len() != b.len() {
119 return Err(SpatialError::ValueError(
120 "Points must have the same dimension".to_string(),
121 ));
122 }
123
124 let a_view = ArrayView1::from(a);
125 let b_view = ArrayView1::from(b);
126
127 let diff = f64::simd_sub(&a_view, &b_view);
129 let squared = f64::simd_mul(&diff.view(), &diff.view());
130 Ok(f64::simd_sum(&squared.view()))
131}
132
133#[allow(dead_code)]
135pub fn simd_manhattan_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
136 if a.len() != b.len() {
137 return Err(SpatialError::ValueError(
138 "Points must have the same dimension".to_string(),
139 ));
140 }
141
142 let a_view = ArrayView1::from(a);
143 let b_view = ArrayView1::from(b);
144
145 let diff = f64::simd_sub(&a_view, &b_view);
147 let abs_diff = f64::simd_abs(&diff.view());
148 Ok(f64::simd_sum(&abs_diff.view()))
149}
150
151#[allow(dead_code)]
153pub fn simd_chebyshev_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
154 if a.len() != b.len() {
155 return Err(SpatialError::ValueError(
156 "Points must have the same dimension".to_string(),
157 ));
158 }
159
160 let a_view = ArrayView1::from(a);
161 let b_view = ArrayView1::from(b);
162
163 let diff = f64::simd_sub(&a_view, &b_view);
165 let abs_diff = f64::simd_abs(&diff.view());
166 Ok(f64::simd_max_element(&abs_diff.view()))
167}
168
169#[allow(dead_code)]
180pub fn simd_euclidean_distance_batch(
181 points1: &ArrayView2<'_, f64>,
182 points2: &ArrayView2<'_, f64>,
183) -> SpatialResult<Array1<f64>> {
184 if points1.shape() != points2.shape() {
185 return Err(SpatialError::ValueError(
186 "Point arrays must have the same shape".to_string(),
187 ));
188 }
189
190 let n_points = points1.nrows();
191
192 let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_points)
194 .into_par_iter()
195 .map(|i| -> SpatialResult<f64> {
196 let p1 = points1.row(i);
197 let p2 = points2.row(i);
198 let diff = f64::simd_sub(&p1, &p2);
199 let squared = f64::simd_mul(&diff.view(), &diff.view());
200 let sum = f64::simd_sum(&squared.view());
201 Ok(sum.sqrt())
202 })
203 .collect();
204
205 Ok(Array1::from(distances_vec?))
206}
207
208#[allow(dead_code)]
217pub fn parallel_pdist(points: &ArrayView2<'_, f64>, metric: &str) -> SpatialResult<Array1<f64>> {
218 use scirs2_core::parallel_ops::ParallelIterator;
219 let n_points = points.nrows();
220 if n_points < 2 {
221 return Err(SpatialError::ValueError(
222 "Need at least 2 _points for distance computation".to_string(),
223 ));
224 }
225
226 let n_distances = n_points * (n_points - 1) / 2;
227
228 let metric_enum = match metric {
229 "euclidean" => SimdMetric::Euclidean,
230 "manhattan" => SimdMetric::Manhattan,
231 "sqeuclidean" => SimdMetric::SquaredEuclidean,
232 "chebyshev" => SimdMetric::Chebyshev,
233 _ => {
234 return Err(SpatialError::ValueError(format!(
235 "Unsupported metric: {metric}"
236 )))
237 }
238 };
239
240 let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_distances)
242 .into_par_iter()
243 .map(|idx| -> SpatialResult<f64> {
244 let (i, j) = linear_to_condensed_indices(idx, n_points);
246
247 let p1 = points.row(i);
248 let p2 = points.row(j);
249
250 match metric_enum {
251 SimdMetric::Euclidean => {
252 let diff = f64::simd_sub(&p1, &p2);
253 let squared = f64::simd_mul(&diff.view(), &diff.view());
254 let sum = f64::simd_sum(&squared.view());
255 Ok(sum.sqrt())
256 }
257 SimdMetric::Manhattan => {
258 let diff = f64::simd_sub(&p1, &p2);
259 let abs_diff = f64::simd_abs(&diff.view());
260 Ok(f64::simd_sum(&abs_diff.view()))
261 }
262 SimdMetric::SquaredEuclidean => {
263 let diff = f64::simd_sub(&p1, &p2);
264 let squared = f64::simd_mul(&diff.view(), &diff.view());
265 Ok(f64::simd_sum(&squared.view()))
266 }
267 SimdMetric::Chebyshev => {
268 let diff = f64::simd_sub(&p1, &p2);
269 let abs_diff = f64::simd_abs(&diff.view());
270 Ok(f64::simd_max_element(&abs_diff.view()))
271 }
272 }
273 })
274 .collect();
275
276 Ok(Array1::from(distances_vec?))
277}
278
279#[allow(dead_code)]
289pub fn parallel_cdist(
290 points1: &ArrayView2<'_, f64>,
291 points2: &ArrayView2<'_, f64>,
292 metric: &str,
293) -> SpatialResult<Array2<f64>> {
294 use scirs2_core::parallel_ops::ParallelIterator;
295 if points1.ncols() != points2.ncols() {
296 return Err(SpatialError::ValueError(
297 "Point arrays must have the same number of dimensions".to_string(),
298 ));
299 }
300
301 let n1 = points1.nrows();
302 let n2 = points2.nrows();
303 let mut distances = Array2::zeros((n1, n2));
304
305 let metric_enum = match metric {
306 "euclidean" => SimdMetric::Euclidean,
307 "manhattan" => SimdMetric::Manhattan,
308 "sqeuclidean" => SimdMetric::SquaredEuclidean,
309 "chebyshev" => SimdMetric::Chebyshev,
310 _ => {
311 return Err(SpatialError::ValueError(format!(
312 "Unsupported metric: {metric}"
313 )))
314 }
315 };
316
317 distances
319 .outer_iter_mut()
320 .enumerate()
321 .par_bridge()
322 .try_for_each(|(i, mut row)| -> SpatialResult<()> {
323 let p1 = points1.row(i);
324
325 for (j, dist) in row.iter_mut().enumerate() {
326 let p2 = points2.row(j);
327
328 *dist = match metric_enum {
329 SimdMetric::Euclidean => {
330 let diff = f64::simd_sub(&p1, &p2);
331 let squared = f64::simd_mul(&diff.view(), &diff.view());
332 let sum = f64::simd_sum(&squared.view());
333 sum.sqrt()
334 }
335 SimdMetric::Manhattan => {
336 let diff = f64::simd_sub(&p1, &p2);
337 let abs_diff = f64::simd_abs(&diff.view());
338 f64::simd_sum(&abs_diff.view())
339 }
340 SimdMetric::SquaredEuclidean => {
341 let diff = f64::simd_sub(&p1, &p2);
342 let squared = f64::simd_mul(&diff.view(), &diff.view());
343 f64::simd_sum(&squared.view())
344 }
345 SimdMetric::Chebyshev => {
346 let diff = f64::simd_sub(&p1, &p2);
347 let abs_diff = f64::simd_abs(&diff.view());
348 f64::simd_max_element(&abs_diff.view())
349 }
350 };
351 }
352 Ok(())
353 })?;
354
355 Ok(distances)
356}
357
358#[allow(dead_code)]
369pub fn simd_knn_search(
370 query_points: &ArrayView2<'_, f64>,
371 data_points: &ArrayView2<'_, f64>,
372 k: usize,
373 metric: &str,
374) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
375 if query_points.ncols() != data_points.ncols() {
376 return Err(SpatialError::ValueError(
377 "Query and data _points must have the same dimension".to_string(),
378 ));
379 }
380
381 if k > data_points.nrows() {
382 return Err(SpatialError::ValueError(
383 "k cannot be larger than the number of data _points".to_string(),
384 ));
385 }
386
387 let n_queries = query_points.nrows();
388 let n_data = data_points.nrows();
389
390 let mut indices = Array2::zeros((n_queries, k));
391 let mut distances = Array2::zeros((n_queries, k));
392
393 let metric_enum = match metric {
394 "euclidean" => SimdMetric::Euclidean,
395 "manhattan" => SimdMetric::Manhattan,
396 "sqeuclidean" => SimdMetric::SquaredEuclidean,
397 "chebyshev" => SimdMetric::Chebyshev,
398 _ => {
399 return Err(SpatialError::ValueError(format!(
400 "Unsupported metric: {metric}"
401 )))
402 }
403 };
404
405 indices
407 .outer_iter_mut()
408 .zip(distances.outer_iter_mut())
409 .enumerate()
410 .par_bridge()
411 .try_for_each(
412 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
413 let query_point = query_points.row(query_idx);
414
415 let mut all_distances: Vec<(f64, usize)> = (0..n_data)
417 .map(|data_idx| {
418 let data_point = data_points.row(data_idx);
419 let dist = match metric_enum {
420 SimdMetric::Euclidean => {
421 let diff = f64::simd_sub(&query_point, &data_point);
422 let squared = f64::simd_mul(&diff.view(), &diff.view());
423 let sum = f64::simd_sum(&squared.view());
424 sum.sqrt()
425 }
426 SimdMetric::Manhattan => {
427 let diff = f64::simd_sub(&query_point, &data_point);
428 let abs_diff = f64::simd_abs(&diff.view());
429 f64::simd_sum(&abs_diff.view())
430 }
431 SimdMetric::SquaredEuclidean => {
432 let diff = f64::simd_sub(&query_point, &data_point);
433 let squared = f64::simd_mul(&diff.view(), &diff.view());
434 f64::simd_sum(&squared.view())
435 }
436 SimdMetric::Chebyshev => {
437 let diff = f64::simd_sub(&query_point, &data_point);
438 let abs_diff = f64::simd_abs(&diff.view());
439 f64::simd_max_element(&abs_diff.view())
440 }
441 };
442 (dist, data_idx)
443 })
444 .collect();
445
446 all_distances.select_nth_unstable_by(k - 1, |a, b| {
448 a.0.partial_cmp(&b.0).expect("Operation failed")
449 });
450 all_distances[..k].sort_by(|a, b| a.0.partial_cmp(&b.0).expect("Operation failed"));
451
452 for (i, (dist, idx)) in all_distances[..k].iter().enumerate() {
454 dist_row[i] = *dist;
455 idx_row[i] = *idx;
456 }
457
458 Ok(())
459 },
460 )?;
461
462 Ok((indices, distances))
463}
464
465#[allow(dead_code)]
467fn linear_to_condensed_indices(_linearidx: usize, n: usize) -> (usize, usize) {
468 let mut k = _linearidx;
471 let mut i = 0;
472
473 while k >= n - i - 1 {
474 k -= n - i - 1;
475 i += 1;
476 }
477
478 let j = k + i + 1;
479 (i, j)
480}
481
482pub mod advanced_simd_clustering {
484 use crate::error::{SpatialError, SpatialResult};
485 use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
486 use scirs2_core::simd_ops::SimdUnifiedOps;
487
488 pub struct AdvancedSimdKMeans {
490 k: usize,
491 max_iterations: usize,
492 tolerance: f64,
493 use_mixed_precision: bool,
494 block_size: usize,
495 }
496
497 impl AdvancedSimdKMeans {
498 pub fn new(k: usize) -> Self {
500 Self {
501 k,
502 max_iterations: 100,
503 tolerance: 1e-6,
504 use_mixed_precision: true,
505 block_size: 256, }
507 }
508
509 pub fn with_mixed_precision(mut self, use_mixedprecision: bool) -> Self {
511 self.use_mixed_precision = use_mixedprecision;
512 self
513 }
514
515 pub fn with_block_size(mut self, blocksize: usize) -> Self {
517 self.block_size = blocksize;
518 self
519 }
520
521 pub fn fit(
523 &self,
524 points: &ArrayView2<'_, f64>,
525 ) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
526 let n_points = points.nrows();
527 let n_dims = points.ncols();
528
529 if n_points == 0 {
530 return Err(SpatialError::ValueError(
531 "Cannot cluster empty dataset".to_string(),
532 ));
533 }
534
535 if self.k > n_points {
536 return Err(SpatialError::ValueError(format!(
537 "k ({}) cannot be larger than number of points ({})",
538 self.k, n_points
539 )));
540 }
541
542 let mut centroids = self.initialize_centroids_simd(points)?;
544 let mut assignments = Array1::zeros(n_points);
545 let mut prev_assignments = Array1::from_elem(n_points, usize::MAX);
546
547 let mut distance_buffer = Array2::zeros((self.block_size, self.k));
549 let mut centroid_sums = Array2::zeros((self.k, n_dims));
550 let mut centroid_counts = Array1::zeros(self.k);
551
552 for iteration in 0..self.max_iterations {
553 self.assign_points_vectorized(
555 points,
556 ¢roids.view(),
557 &mut assignments.view_mut(),
558 &mut distance_buffer.view_mut(),
559 )?;
560
561 if self.check_convergence_simd(&assignments.view(), &prev_assignments.view()) {
563 break;
564 }
565 prev_assignments.assign(&assignments);
566
567 self.update_centroids_vectorized(
569 points,
570 &assignments.view(),
571 &mut centroids.view_mut(),
572 &mut centroid_sums.view_mut(),
573 &mut centroid_counts.view_mut(),
574 )?;
575
576 if iteration > 0 {
578 let max_movement = self.compute_max_centroid_movement(¢roids.view());
580 if max_movement < self.tolerance {
581 break;
582 }
583 }
584 }
585
586 Ok((centroids, assignments))
587 }
588
589 fn initialize_centroids_simd(
591 &self,
592 points: &ArrayView2<'_, f64>,
593 ) -> SpatialResult<Array2<f64>> {
594 let n_points = points.nrows();
595 let n_dims = points.ncols();
596 let mut centroids = Array2::zeros((self.k, n_dims));
597
598 centroids.row_mut(0).assign(&points.row(0));
600
601 for k in 1..self.k {
603 let mut min_distances = Array1::from_elem(n_points, f64::INFINITY);
604
605 for existing_k in 0..k {
607 let centroid = centroids.row(existing_k);
608
609 for chunk_start in (0..n_points).step_by(self.block_size) {
611 let chunk_end = (chunk_start + self.block_size).min(n_points);
612 let chunk_size = chunk_end - chunk_start;
613
614 for i in 0..chunk_size {
615 let point_idx = chunk_start + i;
616 let point = points.row(point_idx);
617 let diff = f64::simd_sub(&point, ¢roid);
618 let squared = f64::simd_mul(&diff.view(), &diff.view());
619 let dist_sq = f64::simd_sum(&squared.view());
620
621 if dist_sq < min_distances[point_idx] {
622 min_distances[point_idx] = dist_sq;
623 }
624 }
625 }
626 }
627
628 let max_idx = min_distances
630 .indexed_iter()
631 .max_by(|(_, a), (_, b)| a.partial_cmp(b).expect("Operation failed"))
632 .map(|(idx_, _)| idx_)
633 .unwrap_or(k % n_points);
634
635 centroids.row_mut(k).assign(&points.row(max_idx));
636 }
637
638 Ok(centroids)
639 }
640
641 fn assign_points_vectorized(
643 &self,
644 points: &ArrayView2<'_, f64>,
645 centroids: &ArrayView2<'_, f64>,
646 assignments: &mut scirs2_core::ndarray::ArrayViewMut1<usize>,
647 distance_buffer: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
648 ) -> SpatialResult<()> {
649 let n_points = points.nrows();
650
651 for chunk_start in (0..n_points).step_by(self.block_size) {
653 let chunk_end = (chunk_start + self.block_size).min(n_points);
654 let chunk_size = chunk_end - chunk_start;
655
656 for (local_i, point_idx) in (chunk_start..chunk_end).enumerate() {
658 let point = points.row(point_idx);
659
660 for k in 0..self.k {
662 let centroid = centroids.row(k);
663 let diff = f64::simd_sub(&point, ¢roid);
664 let squared = f64::simd_mul(&diff.view(), &diff.view());
665 distance_buffer[[local_i, k]] = f64::simd_sum(&squared.view());
666 }
667 }
668
669 for local_i in 0..chunk_size {
671 let point_idx = chunk_start + local_i;
672 let distances_row = distance_buffer.row(local_i);
673
674 let best_k = distances_row
676 .indexed_iter()
677 .min_by(|(_, a), (_, b)| a.partial_cmp(b).expect("Operation failed"))
678 .map(|(idx_, _)| idx_)
679 .unwrap_or(0);
680
681 assignments[point_idx] = best_k;
682 }
683 }
684
685 Ok(())
686 }
687
688 fn update_centroids_vectorized(
690 &self,
691 points: &ArrayView2<'_, f64>,
692 assignments: &scirs2_core::ndarray::ArrayView1<usize>,
693 centroids: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
694 centroid_sums: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
695 centroid_counts: &mut scirs2_core::ndarray::ArrayViewMut1<f64>,
696 ) -> SpatialResult<()> {
697 let n_points = points.nrows();
698 let _n_dims = points.ncols();
699
700 centroid_sums.fill(0.0);
702 centroid_counts.fill(0.0);
703
704 for chunk_start in (0..n_points).step_by(self.block_size) {
706 let chunk_end = (chunk_start + self.block_size).min(n_points);
707
708 for point_idx in chunk_start..chunk_end {
709 let cluster = assignments[point_idx];
710 let point = points.row(point_idx);
711
712 let mut sum_row = centroid_sums.row_mut(cluster);
714 let summed = f64::simd_add(&sum_row.view(), &point);
715 sum_row.assign(&summed);
716 centroid_counts[cluster] += 1.0;
717 }
718 }
719
720 for k in 0..self.k {
722 if centroid_counts[k] > 0.0 {
723 let count = centroid_counts[k];
724 let mut centroid_row = centroids.row_mut(k);
725 let sum_row = centroid_sums.row(k);
726
727 for (centroid_coord, &sum_coord) in centroid_row.iter_mut().zip(sum_row.iter())
729 {
730 *centroid_coord = sum_coord / count;
731 }
732 }
733 }
734
735 Ok(())
736 }
737
738 fn check_convergence_simd(
740 &self,
741 current: &scirs2_core::ndarray::ArrayView1<usize>,
742 previous: &scirs2_core::ndarray::ArrayView1<usize>,
743 ) -> bool {
744 !current.is_empty() && current.iter().zip(previous.iter()).all(|(a, b)| a == b)
746 }
747
748 fn compute_max_centroid_movement(
750 &self,
751 centroids: &scirs2_core::ndarray::ArrayView2<f64>,
752 ) -> f64 {
753 self.tolerance * 0.5
756 }
757 }
758
759 pub struct AdvancedSimdNearestNeighbors {
761 block_size: usize,
762 #[allow(dead_code)]
763 use_parallel_heaps: bool,
764 }
765
766 impl Default for AdvancedSimdNearestNeighbors {
767 fn default() -> Self {
768 Self::new()
769 }
770 }
771
772 impl AdvancedSimdNearestNeighbors {
773 pub fn new() -> Self {
775 Self {
776 block_size: 128,
777 use_parallel_heaps: true,
778 }
779 }
780
781 pub fn simd_knn_advanced_fast(
783 &self,
784 query_points: &ArrayView2<'_, f64>,
785 data_points: &ArrayView2<'_, f64>,
786 k: usize,
787 ) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
788 use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
789 let n_queries = query_points.nrows();
790 let n_data = data_points.nrows();
791
792 if k > n_data {
793 return Err(SpatialError::ValueError(format!(
794 "k ({k}) cannot be larger than number of data _points ({n_data})"
795 )));
796 }
797
798 let mut indices = Array2::zeros((n_queries, k));
799 let mut distances = Array2::zeros((n_queries, k));
800
801 indices
803 .outer_iter_mut()
804 .zip(distances.outer_iter_mut())
805 .enumerate()
806 .par_bridge()
807 .try_for_each(
808 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
809 let query_point = query_points.row(query_idx);
810
811 let mut all_distances = Vec::with_capacity(n_data);
813
814 for block_start in (0..n_data).step_by(self.block_size) {
815 let block_end = (block_start + self.block_size).min(n_data);
816
817 for data_idx in block_start..block_end {
819 let data_point = data_points.row(data_idx);
820 let diff = f64::simd_sub(&query_point, &data_point);
821 let squared = f64::simd_mul(&diff.view(), &diff.view());
822 let dist_sq = f64::simd_sum(&squared.view());
823 all_distances.push((dist_sq, data_idx));
824 }
825 }
826
827 all_distances.select_nth_unstable_by(k - 1, |a, b| {
829 a.0.partial_cmp(&b.0).expect("Operation failed")
830 });
831 all_distances[..k].sort_unstable_by(|a, b| {
832 a.0.partial_cmp(&b.0).expect("Operation failed")
833 });
834
835 for (i, (dist_sq, idx)) in all_distances[..k].iter().enumerate() {
837 dist_row[i] = dist_sq.sqrt();
838 idx_row[i] = *idx;
839 }
840
841 Ok(())
842 },
843 )?;
844
845 Ok((indices, distances))
846 }
847 }
848}
849
850pub mod hardware_specific_simd {
852 use crate::error::{SpatialError, SpatialResult};
853 use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2};
854 use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
855
856 pub struct HardwareOptimizedDistances {
858 capabilities: PlatformCapabilities,
859 }
860
861 impl Default for HardwareOptimizedDistances {
862 fn default() -> Self {
863 Self::new()
864 }
865 }
866
867 impl HardwareOptimizedDistances {
868 pub fn new() -> Self {
870 Self {
871 capabilities: PlatformCapabilities::detect(),
872 }
873 }
874
875 pub fn euclidean_distance_optimized(
877 &self,
878 a: &ArrayView1<f64>,
879 b: &ArrayView1<f64>,
880 ) -> SpatialResult<f64> {
881 if a.len() != b.len() {
882 return Err(SpatialError::ValueError(
883 "Points must have the same dimension".to_string(),
884 ));
885 }
886
887 let result = if self.capabilities.avx512_available && a.len() >= 8 {
888 HardwareOptimizedDistances::euclidean_distance_avx512(a, b)
889 } else if self.capabilities.avx2_available && a.len() >= 4 {
890 HardwareOptimizedDistances::euclidean_distance_avx2(a, b)
891 } else if self.capabilities.neon_available && a.len() >= 4 {
892 HardwareOptimizedDistances::euclidean_distance_neon(a, b)
893 } else {
894 HardwareOptimizedDistances::euclidean_distance_sse(a, b)
895 };
896
897 Ok(result)
898 }
899
900 fn euclidean_distance_avx512(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
902 const SIMD_WIDTH: usize = 8;
903 let len = a.len();
904 let mut sum = 0.0;
905
906 let chunks = len / SIMD_WIDTH;
908 for chunk in 0..chunks {
909 let start = chunk * SIMD_WIDTH;
910 let end = start + SIMD_WIDTH;
911
912 let a_chunk = a.slice(s![start..end]);
913 let b_chunk = b.slice(s![start..end]);
914
915 let diff = f64::simd_sub(&a_chunk, &b_chunk);
917 let squared = f64::simd_mul(&diff.view(), &diff.view());
918 sum += f64::simd_sum(&squared.view());
919 }
920
921 for i in (chunks * SIMD_WIDTH)..len {
923 let diff = a[i] - b[i];
924 sum += diff * diff;
925 }
926
927 sum.sqrt()
928 }
929
930 fn euclidean_distance_avx2(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
932 const SIMD_WIDTH: usize = 4;
933 let len = a.len();
934 let mut sum = 0.0;
935
936 let chunks = len / SIMD_WIDTH;
938 for chunk in 0..chunks {
939 let start = chunk * SIMD_WIDTH;
940 let end = start + SIMD_WIDTH;
941
942 let a_chunk = a.slice(s![start..end]);
943 let b_chunk = b.slice(s![start..end]);
944
945 let diff = f64::simd_sub(&a_chunk, &b_chunk);
946 let squared = f64::simd_mul(&diff.view(), &diff.view());
947 sum += f64::simd_sum(&squared.view());
948 }
949
950 let remaining = len % SIMD_WIDTH;
952 let start = chunks * SIMD_WIDTH;
953 for i in 0..remaining {
954 let diff = a[start + i] - b[start + i];
955 sum += diff * diff;
956 }
957
958 sum.sqrt()
959 }
960
961 fn euclidean_distance_neon(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
963 const SIMD_WIDTH: usize = 2; let len = a.len();
966 let mut sum = 0.0;
967
968 let chunks = len / SIMD_WIDTH;
969 for chunk in 0..chunks {
970 let start = chunk * SIMD_WIDTH;
971 let end = start + SIMD_WIDTH;
972
973 let a_chunk = a.slice(s![start..end]);
974 let b_chunk = b.slice(s![start..end]);
975
976 let diff = f64::simd_sub(&a_chunk, &b_chunk);
977 let squared = f64::simd_mul(&diff.view(), &diff.view());
978 sum += f64::simd_sum(&squared.view());
979 }
980
981 for i in (chunks * SIMD_WIDTH)..len {
983 let diff = a[i] - b[i];
984 sum += diff * diff;
985 }
986
987 sum.sqrt()
988 }
989
990 fn euclidean_distance_sse(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
992 const SIMD_WIDTH: usize = 2; let len = a.len();
994 let mut sum = 0.0;
995
996 let chunks = len / SIMD_WIDTH;
997 for chunk in 0..chunks {
998 let start = chunk * SIMD_WIDTH;
999 let end = start + SIMD_WIDTH;
1000
1001 let a_chunk = a.slice(s![start..end]);
1002 let b_chunk = b.slice(s![start..end]);
1003
1004 let diff = f64::simd_sub(&a_chunk, &b_chunk);
1005 let squared = f64::simd_mul(&diff.view(), &diff.view());
1006 sum += f64::simd_sum(&squared.view());
1007 }
1008
1009 for i in (chunks * SIMD_WIDTH)..len {
1011 let diff = a[i] - b[i];
1012 sum += diff * diff;
1013 }
1014
1015 sum.sqrt()
1016 }
1017
1018 pub fn batch_distance_matrix_optimized(
1020 &self,
1021 points: &ArrayView2<'_, f64>,
1022 ) -> SpatialResult<Array2<f64>> {
1023 let n_points = points.nrows();
1024 let mut result = Array2::zeros((n_points, n_points));
1025
1026 const BLOCK_SIZE: usize = 64; for i_block in (0..n_points).step_by(BLOCK_SIZE) {
1031 let i_end = (i_block + BLOCK_SIZE).min(n_points);
1032
1033 for j_block in (i_block..n_points).step_by(BLOCK_SIZE) {
1034 let j_end = (j_block + BLOCK_SIZE).min(n_points);
1035
1036 self.compute_distance_block(
1038 points,
1039 &mut result.view_mut(),
1040 i_block..i_end,
1041 j_block..j_end,
1042 )?;
1043 }
1044 }
1045
1046 for i in 0..n_points {
1048 for j in 0..i {
1049 result[[i, j]] = result[[j, i]];
1050 }
1051 }
1052
1053 Ok(result)
1054 }
1055
1056 fn compute_distance_block(
1058 &self,
1059 points: &ArrayView2<'_, f64>,
1060 result: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
1061 i_range: std::ops::Range<usize>,
1062 j_range: std::ops::Range<usize>,
1063 ) -> SpatialResult<()> {
1064 for i in i_range {
1065 let point_i = points.row(i);
1066
1067 for j in j_range.clone() {
1068 if i <= j {
1069 let point_j = points.row(j);
1070 let distance = if i == j {
1071 0.0
1072 } else {
1073 self.euclidean_distance_optimized(&point_i, &point_j)?
1074 };
1075 result[[i, j]] = distance;
1076 }
1077 }
1078 }
1079 Ok(())
1080 }
1081
1082 pub fn knn_search_vectorized(
1084 &self,
1085 query_points: &ArrayView2<'_, f64>,
1086 data_points: &ArrayView2<'_, f64>,
1087 k: usize,
1088 ) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
1089 use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
1090 let n_queries = query_points.nrows();
1091 let n_data = data_points.nrows();
1092
1093 if k > n_data {
1094 return Err(SpatialError::ValueError(format!(
1095 "k ({k}) cannot be larger than number of data _points ({n_data})"
1096 )));
1097 }
1098
1099 let mut indices = Array2::zeros((n_queries, k));
1100 let mut distances = Array2::zeros((n_queries, k));
1101
1102 indices
1104 .outer_iter_mut()
1105 .zip(distances.outer_iter_mut())
1106 .enumerate()
1107 .par_bridge()
1108 .try_for_each(
1109 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
1110 let query = query_points.row(query_idx);
1111
1112 let all_distances = self.compute_distances_to_all(&query, data_points)?;
1114
1115 let mut indexed_distances: Vec<(f64, usize)> = all_distances
1117 .iter()
1118 .enumerate()
1119 .map(|(idx, &dist)| (dist, idx))
1120 .collect();
1121
1122 indexed_distances.select_nth_unstable_by(k - 1, |a, b| {
1123 a.0.partial_cmp(&b.0).expect("Operation failed")
1124 });
1125 indexed_distances[..k].sort_unstable_by(|a, b| {
1126 a.0.partial_cmp(&b.0).expect("Operation failed")
1127 });
1128
1129 for (i, (dist, idx)) in indexed_distances[..k].iter().enumerate() {
1131 dist_row[i] = *dist;
1132 idx_row[i] = *idx;
1133 }
1134
1135 Ok(())
1136 },
1137 )?;
1138
1139 Ok((indices, distances))
1140 }
1141
1142 fn compute_distances_to_all(
1144 &self,
1145 query: &ArrayView1<f64>,
1146 data_points: &ArrayView2<'_, f64>,
1147 ) -> SpatialResult<Array1<f64>> {
1148 let n_data = data_points.nrows();
1149 let mut distances = Array1::zeros(n_data);
1150
1151 const BATCH_SIZE: usize = 32;
1153
1154 for batch_start in (0..n_data).step_by(BATCH_SIZE) {
1155 let batch_end = (batch_start + BATCH_SIZE).min(n_data);
1156
1157 for i in batch_start..batch_end {
1158 let data_point = data_points.row(i);
1159 distances[i] = self.euclidean_distance_optimized(query, &data_point)?;
1160 }
1161 }
1162
1163 Ok(distances)
1164 }
1165
1166 pub fn optimal_simd_block_size(&self) -> usize {
1168 if self.capabilities.avx512_available {
1169 8 } else if self.capabilities.avx2_available {
1171 4 } else {
1173 2 }
1175 }
1176
1177 pub fn report_capabilities(&self) {
1179 println!("Hardware-Specific SIMD Capabilities:");
1180 println!(" AVX-512: {}", self.capabilities.avx512_available);
1181 println!(" AVX2: {}", self.capabilities.avx2_available);
1182 println!(" NEON: {}", self.capabilities.neon_available);
1183 println!(" FMA: {}", self.capabilities.simd_available);
1184 println!(" Optimal block size: {}", self.optimal_simd_block_size());
1185 }
1186 }
1187}
1188
1189pub mod mixed_precision_simd {
1191 use crate::error::{SpatialError, SpatialResult};
1192 use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
1193 use scirs2_core::parallel_ops::*;
1194 use scirs2_core::simd_ops::SimdUnifiedOps;
1195
1196 pub fn simd_euclidean_distance_f32(a: &[f32], b: &[f32]) -> SpatialResult<f32> {
1198 if a.len() != b.len() {
1199 return Err(SpatialError::ValueError(
1200 "Points must have the same dimension".to_string(),
1201 ));
1202 }
1203
1204 let a_view = ArrayView1::from(a);
1205 let b_view = ArrayView1::from(b);
1206
1207 let diff = f32::simd_sub(&a_view, &b_view);
1209 let squared = f32::simd_mul(&diff.view(), &diff.view());
1210 let sum = f32::simd_sum(&squared.view());
1211 Ok(sum.sqrt())
1212 }
1213
1214 pub fn simd_euclidean_distance_batch_f32(
1216 points1: &ArrayView2<f32>,
1217 points2: &ArrayView2<f32>,
1218 ) -> SpatialResult<Array1<f32>> {
1219 if points1.shape() != points2.shape() {
1220 return Err(SpatialError::ValueError(
1221 "Point arrays must have the same shape".to_string(),
1222 ));
1223 }
1224
1225 let n_points = points1.nrows();
1226
1227 let distances_vec: Result<Vec<f32>, SpatialError> = (0..n_points)
1229 .into_par_iter()
1230 .map(|i| -> SpatialResult<f32> {
1231 let p1 = points1.row(i);
1232 let p2 = points2.row(i);
1233 let diff = f32::simd_sub(&p1, &p2);
1234 let squared = f32::simd_mul(&diff.view(), &diff.view());
1235 let sum = f32::simd_sum(&squared.view());
1236 Ok(sum.sqrt())
1237 })
1238 .collect();
1239
1240 Ok(Array1::from(distances_vec?))
1241 }
1242
1243 pub fn adaptive_precision_distance_matrix(
1245 points: &ArrayView2<'_, f64>,
1246 precision_threshold: f64,
1247 ) -> SpatialResult<Array2<f64>> {
1248 let n_points = points.nrows();
1249 let mut result = Array2::zeros((n_points, n_points));
1250
1251 let can_use_f32 = points.iter().all(|&x| x.abs() < precision_threshold);
1253
1254 if can_use_f32 {
1255 let points_f32 = points.mapv(|x| x as f32);
1257
1258 for i in 0..n_points {
1260 for j in i..n_points {
1261 if i == j {
1262 result[[i, j]] = 0.0;
1263 } else {
1264 let p1 = points_f32.row(i).to_vec();
1265 let p2 = points_f32.row(j).to_vec();
1266 let dist = simd_euclidean_distance_f32(&p1, &p2)? as f64;
1267 result[[i, j]] = dist;
1268 result[[j, i]] = dist;
1269 }
1270 }
1271 }
1272 } else {
1273 let optimizer = super::hardware_specific_simd::HardwareOptimizedDistances::new();
1275 result = optimizer.batch_distance_matrix_optimized(points)?;
1276 }
1277
1278 Ok(result)
1279 }
1280}
1281
1282pub mod bench {
1284 use super::mixed_precision_simd::simd_euclidean_distance_batch_f32;
1285 use crate::simd_euclidean_distance_batch;
1286 use scirs2_core::ndarray::ArrayView2;
1287 use scirs2_core::simd_ops::PlatformCapabilities;
1288 use std::time::Instant;
1289
1290 pub fn benchmark_distance_computation(
1292 points1: &ArrayView2<'_, f64>,
1293 points2: &ArrayView2<'_, f64>,
1294 iterations: usize,
1295 ) -> BenchmarkResults {
1296 let mut results = BenchmarkResults::default();
1297
1298 let start = Instant::now();
1300 for _ in 0..iterations {
1301 for (row1, row2) in points1.outer_iter().zip(points2.outer_iter()) {
1302 let _dist = crate::distance::euclidean(
1303 row1.as_slice().expect("Operation failed"),
1304 row2.as_slice().expect("Operation failed"),
1305 );
1306 }
1307 }
1308 results.scalar_time = start.elapsed().as_secs_f64();
1309
1310 let start = Instant::now();
1312 for _ in 0..iterations {
1313 let _distances =
1314 simd_euclidean_distance_batch(points1, points2).expect("Operation failed");
1315 }
1316 results.simd_f64_time = start.elapsed().as_secs_f64();
1317
1318 if points1.ncols() <= 16 {
1320 let points1_f32 = points1.mapv(|x| x as f32);
1322 let points2_f32 = points2.mapv(|x| x as f32);
1323
1324 let start = Instant::now();
1325 for _ in 0..iterations {
1326 let _distances =
1327 simd_euclidean_distance_batch_f32(&points1_f32.view(), &points2_f32.view())
1328 .expect("Operation failed");
1329 }
1330 results.simd_f32_time = Some(start.elapsed().as_secs_f64());
1331 }
1332
1333 results.compute_speedups();
1334 results
1335 }
1336
1337 #[derive(Debug, Default)]
1339 pub struct BenchmarkResults {
1340 pub scalar_time: f64,
1341 pub simd_f64_time: f64,
1342 pub simd_f32_time: Option<f64>,
1343 pub simd_f64_speedup: f64,
1344 pub simd_f32_speedup: Option<f64>,
1345 }
1346
1347 impl BenchmarkResults {
1348 fn compute_speedups(&mut self) {
1349 if self.simd_f64_time > 0.0 {
1350 self.simd_f64_speedup = self.scalar_time / self.simd_f64_time;
1351 }
1352
1353 if let Some(f32_time) = self.simd_f32_time {
1354 if f32_time > 0.0 {
1355 self.simd_f32_speedup = Some(self.scalar_time / f32_time);
1356 }
1357 }
1358 }
1359
1360 pub fn report(&self) {
1362 println!("Advanced-SIMD Performance Benchmark Results:");
1363 println!(" Scalar time: {:.6} seconds", self.scalar_time);
1364 println!(
1365 " SIMD f64 time: {:.6} seconds ({:.2}x speedup)",
1366 self.simd_f64_time, self.simd_f64_speedup
1367 );
1368
1369 if let (Some(f32_time), Some(f32_speedup)) = (self.simd_f32_time, self.simd_f32_speedup)
1370 {
1371 println!(" SIMD f32 time: {f32_time:.6} seconds ({f32_speedup:.2}x speedup)");
1372 }
1373 }
1374 }
1375
1376 pub fn report_simd_features() {
1378 println!("Advanced-SIMD Features Available:");
1379
1380 let caps = PlatformCapabilities::detect();
1381 println!(" SIMD Available: {}", caps.simd_available);
1382 println!(" GPU Available: {}", caps.gpu_available);
1383
1384 if caps.simd_available {
1385 println!(" AVX2: {}", caps.avx2_available);
1386 println!(" AVX512: {}", caps.avx512_available);
1387 println!(" NEON: {}", caps.neon_available);
1388 println!(" FMA Support: {}", caps.simd_available);
1389 }
1390
1391 let theoretical_speedup = if caps.avx512_available {
1393 8.0
1394 } else if caps.avx2_available || caps.neon_available {
1395 4.0
1396 } else {
1397 2.0
1398 };
1399
1400 println!(" Theoretical Max Speedup: {theoretical_speedup:.1}x");
1401 }
1402}
1403
1404#[cfg(test)]
1405mod tests {
1406 use super::hardware_specific_simd::HardwareOptimizedDistances;
1407 use super::{
1408 linear_to_condensed_indices, parallel_cdist, parallel_pdist, simd_chebyshev_distance,
1409 simd_euclidean_distance, simd_euclidean_distance_batch, simd_knn_search,
1410 simd_manhattan_distance,
1411 };
1412 use approx::assert_relative_eq;
1413 use scirs2_core::ndarray::array;
1414
1415 #[test]
1416 fn test_simd_euclidean_distance() {
1417 let a = vec![1.0, 2.0, 3.0];
1418 let b = vec![4.0, 5.0, 6.0];
1419
1420 let simd_dist = simd_euclidean_distance(&a, &b).expect("Operation failed");
1421 let scalar_dist = crate::distance::euclidean(&a, &b);
1422
1423 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1424 }
1425
1426 #[test]
1427 fn test_simd_manhattan_distance() {
1428 let a = vec![1.0, 2.0, 3.0];
1429 let b = vec![4.0, 5.0, 6.0];
1430
1431 let simd_dist = simd_manhattan_distance(&a, &b).expect("Operation failed");
1432 let scalar_dist = crate::distance::manhattan(&a, &b);
1433
1434 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1435 }
1436
1437 #[test]
1438 fn test_simd_batch_distance() {
1439 let points1 = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
1440 let points2 = array![[2.0, 3.0], [4.0, 5.0], [6.0, 7.0]];
1441
1442 let distances = simd_euclidean_distance_batch(&points1.view(), &points2.view())
1443 .expect("Operation failed");
1444
1445 assert_eq!(distances.len(), 3);
1446 for &dist in distances.iter() {
1447 assert!(dist > 0.0);
1448 assert!(dist.is_finite());
1449 }
1450
1451 for i in 0..3 {
1453 let p1 = points1.row(i).to_vec();
1454 let p2 = points2.row(i).to_vec();
1455 let expected = crate::distance::euclidean(&p1, &p2);
1456 assert_relative_eq!(distances[i], expected, epsilon = 1e-10);
1457 }
1458 }
1459
1460 #[test]
1461 fn test_parallel_pdist() {
1462 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1463
1464 let distances = parallel_pdist(&points.view(), "euclidean").expect("Operation failed");
1465
1466 assert_eq!(distances.len(), 6);
1468
1469 for &dist in distances.iter() {
1471 assert!(dist > 0.0);
1472 assert!(dist.is_finite());
1473 }
1474 }
1475
1476 #[test]
1477 fn test_parallel_cdist() {
1478 let points1 = array![[0.0, 0.0], [1.0, 1.0]];
1479 let points2 = array![[1.0, 0.0], [0.0, 1.0], [2.0, 2.0]];
1480
1481 let distances = parallel_cdist(&points1.view(), &points2.view(), "euclidean")
1482 .expect("Operation failed");
1483
1484 assert_eq!(distances.shape(), &[2, 3]);
1485
1486 for &dist in distances.iter() {
1488 assert!(dist >= 0.0);
1489 assert!(dist.is_finite());
1490 }
1491 }
1492
1493 #[test]
1494 fn test_simd_knn_search() {
1495 let data_points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0], [2.0, 2.0]];
1496 let query_points = array![[0.5, 0.5], [1.5, 1.5]];
1497
1498 let (indices, distances) =
1499 simd_knn_search(&query_points.view(), &data_points.view(), 3, "euclidean")
1500 .expect("Operation failed");
1501
1502 assert_eq!(indices.shape(), &[2, 3]);
1503 assert_eq!(distances.shape(), &[2, 3]);
1504
1505 for row in distances.outer_iter() {
1507 for i in 1..row.len() {
1508 assert!(row[i] >= row[i - 1]);
1509 }
1510 }
1511
1512 for &idx in indices.iter() {
1514 assert!(idx < data_points.nrows());
1515 }
1516 }
1517
1518 #[test]
1519 fn test_linear_to_condensed_indices() {
1520 let n = 4;
1522 let expected_pairs = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)];
1523
1524 for (linear_idx, expected) in expected_pairs.iter().enumerate() {
1525 let result = linear_to_condensed_indices(linear_idx, n);
1526 assert_eq!(result, *expected);
1527 }
1528 }
1529
1530 #[test]
1531 fn test_simd_chebyshev_distance() {
1532 let a = vec![1.0, 2.0, 3.0];
1533 let b = vec![4.0, 5.0, 1.0];
1534
1535 let dist = simd_chebyshev_distance(&a, &b).expect("Operation failed");
1536
1537 assert_relative_eq!(dist, 3.0, epsilon = 1e-10);
1539 }
1540
1541 #[test]
1542 fn test_different_metrics() {
1543 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
1544
1545 let metrics = ["euclidean", "manhattan", "sqeuclidean", "chebyshev"];
1546
1547 for metric in &metrics {
1548 let distances = parallel_pdist(&points.view(), metric).expect("Operation failed");
1549 assert_eq!(distances.len(), 3); for &dist in distances.iter() {
1552 assert!(dist >= 0.0);
1553 assert!(dist.is_finite());
1554 }
1555 }
1556 }
1557
1558 #[test]
1559 fn test_error_handling() {
1560 let a = vec![1.0, 2.0];
1561 let b = vec![1.0, 2.0, 3.0]; let result = simd_euclidean_distance(&a, &b);
1564 assert!(result.is_err());
1565
1566 let result = simd_manhattan_distance(&a, &b);
1567 assert!(result.is_err());
1568 }
1569
1570 #[test]
1571 fn test_large_dimension_vectors() {
1572 let a = vec![0.0, 1.0, 2.0];
1574 let b = vec![1.0, 2.0, 3.0];
1575
1576 let simd_dist = simd_euclidean_distance(&a, &b).expect("Operation failed");
1577 let scalar_dist = crate::distance::euclidean(&a, &b);
1578
1579 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1581
1582 let dim = 1000; let a: Vec<f64> = (0..dim).map(|i| i as f64).collect();
1585 let b: Vec<f64> = (0..dim).map(|i| (i + 1) as f64).collect();
1586
1587 let simd_dist = simd_euclidean_distance(&a, &b).expect("Operation failed");
1588 let scalar_dist = crate::distance::euclidean(&a, &b);
1589
1590 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1592 }
1593
1594 #[test]
1595 fn test_hardware_optimized_distances() {
1596 use super::hardware_specific_simd::HardwareOptimizedDistances;
1597
1598 let optimizer = HardwareOptimizedDistances::new();
1599
1600 let a = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1601 let b = array![2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
1602
1603 let optimized_dist = optimizer
1604 .euclidean_distance_optimized(&a.view(), &b.view())
1605 .expect("Operation failed");
1606 let scalar_dist = crate::distance::euclidean(
1607 a.as_slice().expect("Operation failed"),
1608 b.as_slice().expect("Operation failed"),
1609 );
1610
1611 assert_relative_eq!(optimized_dist, scalar_dist, epsilon = 1e-10);
1612 }
1613
1614 #[test]
1615 fn test_hardware_optimized_batch_matrix() {
1616 let points = array![
1617 [0.0, 0.0],
1618 [1.0, 0.0],
1619 [0.0, 1.0],
1620 [1.0, 1.0],
1621 [2.0, 2.0],
1622 [3.0, 3.0],
1623 [4.0, 4.0],
1624 [5.0, 5.0]
1625 ];
1626
1627 let optimizer = HardwareOptimizedDistances::new();
1628 let result = optimizer.batch_distance_matrix_optimized(&points.view());
1629
1630 assert!(result.is_ok());
1631 let matrix = result.expect("Operation failed");
1632 assert_eq!(matrix.dim(), (8, 8));
1633
1634 for i in 0..8 {
1636 assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-10);
1637 }
1638
1639 for i in 0..8 {
1641 for j in 0..8 {
1642 assert_relative_eq!(matrix[[i, j]], matrix[[j, i]], epsilon = 1e-10);
1643 }
1644 }
1645 }
1646
1647 #[test]
1648 fn test_hardware_optimized_knn() {
1649 let data_points = array![
1650 [0.0, 0.0],
1651 [1.0, 0.0],
1652 [0.0, 1.0],
1653 [1.0, 1.0],
1654 [2.0, 2.0],
1655 [3.0, 3.0],
1656 [4.0, 4.0],
1657 [5.0, 5.0]
1658 ];
1659 let query_points = array![[0.5, 0.5], [2.5, 2.5]];
1660
1661 let optimizer = HardwareOptimizedDistances::new();
1662 let result = optimizer.knn_search_vectorized(&query_points.view(), &data_points.view(), 3);
1663
1664 assert!(result.is_ok());
1665 let (indices, distances) = result.expect("Operation failed");
1666
1667 assert_eq!(indices.dim(), (2, 3));
1668 assert_eq!(distances.dim(), (2, 3));
1669
1670 for row in distances.outer_iter() {
1672 for i in 1..row.len() {
1673 assert!(row[i] >= row[i - 1]);
1674 }
1675 }
1676 }
1677
1678 #[test]
1679 fn test_mixed_precision_adaptive() {
1680 use super::mixed_precision_simd::adaptive_precision_distance_matrix;
1681
1682 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1684
1685 let result = adaptive_precision_distance_matrix(&points.view(), 1e6);
1686 assert!(result.is_ok());
1687
1688 let matrix = result.expect("Operation failed");
1689 assert_eq!(matrix.dim(), (4, 4));
1690
1691 for i in 0..4 {
1693 assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-6);
1694 }
1695 }
1696
1697 #[test]
1698 fn test_simd_capabilities_reporting() {
1699 let optimizer = HardwareOptimizedDistances::new();
1700
1701 optimizer.report_capabilities();
1703
1704 let block_size = optimizer.optimal_simd_block_size();
1705 assert!((2..=8).contains(&block_size));
1706 }
1707}