1use crate::error::{SpatialError, SpatialResult};
43use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
44use scirs2_core::parallel_ops::*;
45use scirs2_core::simd_ops::SimdUnifiedOps;
46
47#[derive(Debug, Clone, Copy)]
49pub enum SimdMetric {
50 Euclidean,
52 Manhattan,
54 SquaredEuclidean,
56 Chebyshev,
58}
59
60impl SimdMetric {
61 pub fn name(&self) -> &'static str {
79 match self {
80 SimdMetric::Euclidean => "euclidean",
81 SimdMetric::Manhattan => "manhattan",
82 SimdMetric::SquaredEuclidean => "sqeuclidean",
83 SimdMetric::Chebyshev => "chebyshev",
84 }
85 }
86}
87
88#[allow(dead_code)]
97pub fn simd_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
98 if a.len() != b.len() {
99 return Err(SpatialError::ValueError(
100 "Points must have the same dimension".to_string(),
101 ));
102 }
103
104 let a_view = ArrayView1::from(a);
106 let b_view = ArrayView1::from(b);
107
108 let diff = f64::simd_sub(&a_view, &b_view);
110 let squared = f64::simd_mul(&diff.view(), &diff.view());
111 let sum = f64::simd_sum(&squared.view());
112 Ok(sum.sqrt())
113}
114
115#[allow(dead_code)]
117pub fn simd_squared_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
118 if a.len() != b.len() {
119 return Err(SpatialError::ValueError(
120 "Points must have the same dimension".to_string(),
121 ));
122 }
123
124 let a_view = ArrayView1::from(a);
125 let b_view = ArrayView1::from(b);
126
127 let diff = f64::simd_sub(&a_view, &b_view);
129 let squared = f64::simd_mul(&diff.view(), &diff.view());
130 Ok(f64::simd_sum(&squared.view()))
131}
132
133#[allow(dead_code)]
135pub fn simd_manhattan_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
136 if a.len() != b.len() {
137 return Err(SpatialError::ValueError(
138 "Points must have the same dimension".to_string(),
139 ));
140 }
141
142 let a_view = ArrayView1::from(a);
143 let b_view = ArrayView1::from(b);
144
145 let diff = f64::simd_sub(&a_view, &b_view);
147 let abs_diff = f64::simd_abs(&diff.view());
148 Ok(f64::simd_sum(&abs_diff.view()))
149}
150
151#[allow(dead_code)]
153pub fn simd_chebyshev_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
154 if a.len() != b.len() {
155 return Err(SpatialError::ValueError(
156 "Points must have the same dimension".to_string(),
157 ));
158 }
159
160 let a_view = ArrayView1::from(a);
161 let b_view = ArrayView1::from(b);
162
163 let diff = f64::simd_sub(&a_view, &b_view);
165 let abs_diff = f64::simd_abs(&diff.view());
166 Ok(f64::simd_max_element(&abs_diff.view()))
167}
168
169#[allow(dead_code)]
180pub fn simd_euclidean_distance_batch(
181 points1: &ArrayView2<'_, f64>,
182 points2: &ArrayView2<'_, f64>,
183) -> SpatialResult<Array1<f64>> {
184 if points1.shape() != points2.shape() {
185 return Err(SpatialError::ValueError(
186 "Point arrays must have the same shape".to_string(),
187 ));
188 }
189
190 let n_points = points1.nrows();
191
192 let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_points)
194 .into_par_iter()
195 .map(|i| -> SpatialResult<f64> {
196 let p1 = points1.row(i);
197 let p2 = points2.row(i);
198 let diff = f64::simd_sub(&p1, &p2);
199 let squared = f64::simd_mul(&diff.view(), &diff.view());
200 let sum = f64::simd_sum(&squared.view());
201 Ok(sum.sqrt())
202 })
203 .collect();
204
205 Ok(Array1::from(distances_vec?))
206}
207
208#[allow(dead_code)]
217pub fn parallel_pdist(points: &ArrayView2<'_, f64>, metric: &str) -> SpatialResult<Array1<f64>> {
218 use scirs2_core::parallel_ops::ParallelIterator;
219 let n_points = points.nrows();
220 if n_points < 2 {
221 return Err(SpatialError::ValueError(
222 "Need at least 2 _points for distance computation".to_string(),
223 ));
224 }
225
226 let n_distances = n_points * (n_points - 1) / 2;
227
228 let metric_enum = match metric {
229 "euclidean" => SimdMetric::Euclidean,
230 "manhattan" => SimdMetric::Manhattan,
231 "sqeuclidean" => SimdMetric::SquaredEuclidean,
232 "chebyshev" => SimdMetric::Chebyshev,
233 _ => {
234 return Err(SpatialError::ValueError(format!(
235 "Unsupported metric: {metric}"
236 )))
237 }
238 };
239
240 let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_distances)
242 .into_par_iter()
243 .map(|idx| -> SpatialResult<f64> {
244 let (i, j) = linear_to_condensed_indices(idx, n_points);
246
247 let p1 = points.row(i);
248 let p2 = points.row(j);
249
250 match metric_enum {
251 SimdMetric::Euclidean => {
252 let diff = f64::simd_sub(&p1, &p2);
253 let squared = f64::simd_mul(&diff.view(), &diff.view());
254 let sum = f64::simd_sum(&squared.view());
255 Ok(sum.sqrt())
256 }
257 SimdMetric::Manhattan => {
258 let diff = f64::simd_sub(&p1, &p2);
259 let abs_diff = f64::simd_abs(&diff.view());
260 Ok(f64::simd_sum(&abs_diff.view()))
261 }
262 SimdMetric::SquaredEuclidean => {
263 let diff = f64::simd_sub(&p1, &p2);
264 let squared = f64::simd_mul(&diff.view(), &diff.view());
265 Ok(f64::simd_sum(&squared.view()))
266 }
267 SimdMetric::Chebyshev => {
268 let diff = f64::simd_sub(&p1, &p2);
269 let abs_diff = f64::simd_abs(&diff.view());
270 Ok(f64::simd_max_element(&abs_diff.view()))
271 }
272 }
273 })
274 .collect();
275
276 Ok(Array1::from(distances_vec?))
277}
278
279#[allow(dead_code)]
289pub fn parallel_cdist(
290 points1: &ArrayView2<'_, f64>,
291 points2: &ArrayView2<'_, f64>,
292 metric: &str,
293) -> SpatialResult<Array2<f64>> {
294 use scirs2_core::parallel_ops::ParallelIterator;
295 if points1.ncols() != points2.ncols() {
296 return Err(SpatialError::ValueError(
297 "Point arrays must have the same number of dimensions".to_string(),
298 ));
299 }
300
301 let n1 = points1.nrows();
302 let n2 = points2.nrows();
303 let mut distances = Array2::zeros((n1, n2));
304
305 let metric_enum = match metric {
306 "euclidean" => SimdMetric::Euclidean,
307 "manhattan" => SimdMetric::Manhattan,
308 "sqeuclidean" => SimdMetric::SquaredEuclidean,
309 "chebyshev" => SimdMetric::Chebyshev,
310 _ => {
311 return Err(SpatialError::ValueError(format!(
312 "Unsupported metric: {metric}"
313 )))
314 }
315 };
316
317 distances
319 .outer_iter_mut()
320 .enumerate()
321 .par_bridge()
322 .try_for_each(|(i, mut row)| -> SpatialResult<()> {
323 let p1 = points1.row(i);
324
325 for (j, dist) in row.iter_mut().enumerate() {
326 let p2 = points2.row(j);
327
328 *dist = match metric_enum {
329 SimdMetric::Euclidean => {
330 let diff = f64::simd_sub(&p1, &p2);
331 let squared = f64::simd_mul(&diff.view(), &diff.view());
332 let sum = f64::simd_sum(&squared.view());
333 sum.sqrt()
334 }
335 SimdMetric::Manhattan => {
336 let diff = f64::simd_sub(&p1, &p2);
337 let abs_diff = f64::simd_abs(&diff.view());
338 f64::simd_sum(&abs_diff.view())
339 }
340 SimdMetric::SquaredEuclidean => {
341 let diff = f64::simd_sub(&p1, &p2);
342 let squared = f64::simd_mul(&diff.view(), &diff.view());
343 f64::simd_sum(&squared.view())
344 }
345 SimdMetric::Chebyshev => {
346 let diff = f64::simd_sub(&p1, &p2);
347 let abs_diff = f64::simd_abs(&diff.view());
348 f64::simd_max_element(&abs_diff.view())
349 }
350 };
351 }
352 Ok(())
353 })?;
354
355 Ok(distances)
356}
357
358#[allow(dead_code)]
369pub fn simd_knn_search(
370 query_points: &ArrayView2<'_, f64>,
371 data_points: &ArrayView2<'_, f64>,
372 k: usize,
373 metric: &str,
374) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
375 if query_points.ncols() != data_points.ncols() {
376 return Err(SpatialError::ValueError(
377 "Query and data _points must have the same dimension".to_string(),
378 ));
379 }
380
381 if k > data_points.nrows() {
382 return Err(SpatialError::ValueError(
383 "k cannot be larger than the number of data _points".to_string(),
384 ));
385 }
386
387 let n_queries = query_points.nrows();
388 let n_data = data_points.nrows();
389
390 let mut indices = Array2::zeros((n_queries, k));
391 let mut distances = Array2::zeros((n_queries, k));
392
393 let metric_enum = match metric {
394 "euclidean" => SimdMetric::Euclidean,
395 "manhattan" => SimdMetric::Manhattan,
396 "sqeuclidean" => SimdMetric::SquaredEuclidean,
397 "chebyshev" => SimdMetric::Chebyshev,
398 _ => {
399 return Err(SpatialError::ValueError(format!(
400 "Unsupported metric: {metric}"
401 )))
402 }
403 };
404
405 indices
407 .outer_iter_mut()
408 .zip(distances.outer_iter_mut())
409 .enumerate()
410 .par_bridge()
411 .try_for_each(
412 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
413 let query_point = query_points.row(query_idx);
414
415 let mut all_distances: Vec<(f64, usize)> = (0..n_data)
417 .map(|data_idx| {
418 let data_point = data_points.row(data_idx);
419 let dist = match metric_enum {
420 SimdMetric::Euclidean => {
421 let diff = f64::simd_sub(&query_point, &data_point);
422 let squared = f64::simd_mul(&diff.view(), &diff.view());
423 let sum = f64::simd_sum(&squared.view());
424 sum.sqrt()
425 }
426 SimdMetric::Manhattan => {
427 let diff = f64::simd_sub(&query_point, &data_point);
428 let abs_diff = f64::simd_abs(&diff.view());
429 f64::simd_sum(&abs_diff.view())
430 }
431 SimdMetric::SquaredEuclidean => {
432 let diff = f64::simd_sub(&query_point, &data_point);
433 let squared = f64::simd_mul(&diff.view(), &diff.view());
434 f64::simd_sum(&squared.view())
435 }
436 SimdMetric::Chebyshev => {
437 let diff = f64::simd_sub(&query_point, &data_point);
438 let abs_diff = f64::simd_abs(&diff.view());
439 f64::simd_max_element(&abs_diff.view())
440 }
441 };
442 (dist, data_idx)
443 })
444 .collect();
445
446 all_distances.select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
448 all_distances[..k].sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
449
450 for (i, (dist, idx)) in all_distances[..k].iter().enumerate() {
452 dist_row[i] = *dist;
453 idx_row[i] = *idx;
454 }
455
456 Ok(())
457 },
458 )?;
459
460 Ok((indices, distances))
461}
462
463#[allow(dead_code)]
465fn linear_to_condensed_indices(_linearidx: usize, n: usize) -> (usize, usize) {
466 let mut k = _linearidx;
469 let mut i = 0;
470
471 while k >= n - i - 1 {
472 k -= n - i - 1;
473 i += 1;
474 }
475
476 let j = k + i + 1;
477 (i, j)
478}
479
480pub mod advanced_simd_clustering {
482 use crate::error::{SpatialError, SpatialResult};
483 use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
484 use scirs2_core::simd_ops::SimdUnifiedOps;
485
486 pub struct AdvancedSimdKMeans {
488 k: usize,
489 max_iterations: usize,
490 tolerance: f64,
491 use_mixed_precision: bool,
492 block_size: usize,
493 }
494
495 impl AdvancedSimdKMeans {
496 pub fn new(k: usize) -> Self {
498 Self {
499 k,
500 max_iterations: 100,
501 tolerance: 1e-6,
502 use_mixed_precision: true,
503 block_size: 256, }
505 }
506
507 pub fn with_mixed_precision(mut self, use_mixedprecision: bool) -> Self {
509 self.use_mixed_precision = use_mixedprecision;
510 self
511 }
512
513 pub fn with_block_size(mut self, blocksize: usize) -> Self {
515 self.block_size = blocksize;
516 self
517 }
518
519 pub fn fit(
521 &self,
522 points: &ArrayView2<'_, f64>,
523 ) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
524 let n_points = points.nrows();
525 let n_dims = points.ncols();
526
527 if n_points == 0 {
528 return Err(SpatialError::ValueError(
529 "Cannot cluster empty dataset".to_string(),
530 ));
531 }
532
533 if self.k > n_points {
534 return Err(SpatialError::ValueError(format!(
535 "k ({}) cannot be larger than number of points ({})",
536 self.k, n_points
537 )));
538 }
539
540 let mut centroids = self.initialize_centroids_simd(points)?;
542 let mut assignments = Array1::zeros(n_points);
543 let mut prev_assignments = Array1::from_elem(n_points, usize::MAX);
544
545 let mut distance_buffer = Array2::zeros((self.block_size, self.k));
547 let mut centroid_sums = Array2::zeros((self.k, n_dims));
548 let mut centroid_counts = Array1::zeros(self.k);
549
550 for iteration in 0..self.max_iterations {
551 self.assign_points_vectorized(
553 points,
554 ¢roids.view(),
555 &mut assignments.view_mut(),
556 &mut distance_buffer.view_mut(),
557 )?;
558
559 if self.check_convergence_simd(&assignments.view(), &prev_assignments.view()) {
561 break;
562 }
563 prev_assignments.assign(&assignments);
564
565 self.update_centroids_vectorized(
567 points,
568 &assignments.view(),
569 &mut centroids.view_mut(),
570 &mut centroid_sums.view_mut(),
571 &mut centroid_counts.view_mut(),
572 )?;
573
574 if iteration > 0 {
576 let max_movement = self.compute_max_centroid_movement(¢roids.view());
578 if max_movement < self.tolerance {
579 break;
580 }
581 }
582 }
583
584 Ok((centroids, assignments))
585 }
586
587 fn initialize_centroids_simd(
589 &self,
590 points: &ArrayView2<'_, f64>,
591 ) -> SpatialResult<Array2<f64>> {
592 let n_points = points.nrows();
593 let n_dims = points.ncols();
594 let mut centroids = Array2::zeros((self.k, n_dims));
595
596 centroids.row_mut(0).assign(&points.row(0));
598
599 for k in 1..self.k {
601 let mut min_distances = Array1::from_elem(n_points, f64::INFINITY);
602
603 for existing_k in 0..k {
605 let centroid = centroids.row(existing_k);
606
607 for chunk_start in (0..n_points).step_by(self.block_size) {
609 let chunk_end = (chunk_start + self.block_size).min(n_points);
610 let chunk_size = chunk_end - chunk_start;
611
612 for i in 0..chunk_size {
613 let point_idx = chunk_start + i;
614 let point = points.row(point_idx);
615 let diff = f64::simd_sub(&point, ¢roid);
616 let squared = f64::simd_mul(&diff.view(), &diff.view());
617 let dist_sq = f64::simd_sum(&squared.view());
618
619 if dist_sq < min_distances[point_idx] {
620 min_distances[point_idx] = dist_sq;
621 }
622 }
623 }
624 }
625
626 let max_idx = min_distances
628 .indexed_iter()
629 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
630 .map(|(idx_, _)| idx_)
631 .unwrap_or(k % n_points);
632
633 centroids.row_mut(k).assign(&points.row(max_idx));
634 }
635
636 Ok(centroids)
637 }
638
639 fn assign_points_vectorized(
641 &self,
642 points: &ArrayView2<'_, f64>,
643 centroids: &ArrayView2<'_, f64>,
644 assignments: &mut scirs2_core::ndarray::ArrayViewMut1<usize>,
645 distance_buffer: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
646 ) -> SpatialResult<()> {
647 let n_points = points.nrows();
648
649 for chunk_start in (0..n_points).step_by(self.block_size) {
651 let chunk_end = (chunk_start + self.block_size).min(n_points);
652 let chunk_size = chunk_end - chunk_start;
653
654 for (local_i, point_idx) in (chunk_start..chunk_end).enumerate() {
656 let point = points.row(point_idx);
657
658 for k in 0..self.k {
660 let centroid = centroids.row(k);
661 let diff = f64::simd_sub(&point, ¢roid);
662 let squared = f64::simd_mul(&diff.view(), &diff.view());
663 distance_buffer[[local_i, k]] = f64::simd_sum(&squared.view());
664 }
665 }
666
667 for local_i in 0..chunk_size {
669 let point_idx = chunk_start + local_i;
670 let distances_row = distance_buffer.row(local_i);
671
672 let best_k = distances_row
674 .indexed_iter()
675 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
676 .map(|(idx_, _)| idx_)
677 .unwrap_or(0);
678
679 assignments[point_idx] = best_k;
680 }
681 }
682
683 Ok(())
684 }
685
686 fn update_centroids_vectorized(
688 &self,
689 points: &ArrayView2<'_, f64>,
690 assignments: &scirs2_core::ndarray::ArrayView1<usize>,
691 centroids: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
692 centroid_sums: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
693 centroid_counts: &mut scirs2_core::ndarray::ArrayViewMut1<f64>,
694 ) -> SpatialResult<()> {
695 let n_points = points.nrows();
696 let _n_dims = points.ncols();
697
698 centroid_sums.fill(0.0);
700 centroid_counts.fill(0.0);
701
702 for chunk_start in (0..n_points).step_by(self.block_size) {
704 let chunk_end = (chunk_start + self.block_size).min(n_points);
705
706 for point_idx in chunk_start..chunk_end {
707 let cluster = assignments[point_idx];
708 let point = points.row(point_idx);
709
710 let mut sum_row = centroid_sums.row_mut(cluster);
712 let summed = f64::simd_add(&sum_row.view(), &point);
713 sum_row.assign(&summed);
714 centroid_counts[cluster] += 1.0;
715 }
716 }
717
718 for k in 0..self.k {
720 if centroid_counts[k] > 0.0 {
721 let count = centroid_counts[k];
722 let mut centroid_row = centroids.row_mut(k);
723 let sum_row = centroid_sums.row(k);
724
725 for (centroid_coord, &sum_coord) in centroid_row.iter_mut().zip(sum_row.iter())
727 {
728 *centroid_coord = sum_coord / count;
729 }
730 }
731 }
732
733 Ok(())
734 }
735
736 fn check_convergence_simd(
738 &self,
739 current: &scirs2_core::ndarray::ArrayView1<usize>,
740 previous: &scirs2_core::ndarray::ArrayView1<usize>,
741 ) -> bool {
742 !current.is_empty() && current.iter().zip(previous.iter()).all(|(a, b)| a == b)
744 }
745
746 fn compute_max_centroid_movement(
748 &self,
749 centroids: &scirs2_core::ndarray::ArrayView2<f64>,
750 ) -> f64 {
751 self.tolerance * 0.5
754 }
755 }
756
757 pub struct AdvancedSimdNearestNeighbors {
759 block_size: usize,
760 #[allow(dead_code)]
761 use_parallel_heaps: bool,
762 }
763
764 impl Default for AdvancedSimdNearestNeighbors {
765 fn default() -> Self {
766 Self::new()
767 }
768 }
769
770 impl AdvancedSimdNearestNeighbors {
771 pub fn new() -> Self {
773 Self {
774 block_size: 128,
775 use_parallel_heaps: true,
776 }
777 }
778
779 pub fn simd_knn_advanced_fast(
781 &self,
782 query_points: &ArrayView2<'_, f64>,
783 data_points: &ArrayView2<'_, f64>,
784 k: usize,
785 ) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
786 use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
787 let n_queries = query_points.nrows();
788 let n_data = data_points.nrows();
789
790 if k > n_data {
791 return Err(SpatialError::ValueError(format!(
792 "k ({k}) cannot be larger than number of data _points ({n_data})"
793 )));
794 }
795
796 let mut indices = Array2::zeros((n_queries, k));
797 let mut distances = Array2::zeros((n_queries, k));
798
799 indices
801 .outer_iter_mut()
802 .zip(distances.outer_iter_mut())
803 .enumerate()
804 .par_bridge()
805 .try_for_each(
806 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
807 let query_point = query_points.row(query_idx);
808
809 let mut all_distances = Vec::with_capacity(n_data);
811
812 for block_start in (0..n_data).step_by(self.block_size) {
813 let block_end = (block_start + self.block_size).min(n_data);
814
815 for data_idx in block_start..block_end {
817 let data_point = data_points.row(data_idx);
818 let diff = f64::simd_sub(&query_point, &data_point);
819 let squared = f64::simd_mul(&diff.view(), &diff.view());
820 let dist_sq = f64::simd_sum(&squared.view());
821 all_distances.push((dist_sq, data_idx));
822 }
823 }
824
825 all_distances
827 .select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
828 all_distances[..k].sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
829
830 for (i, (dist_sq, idx)) in all_distances[..k].iter().enumerate() {
832 dist_row[i] = dist_sq.sqrt();
833 idx_row[i] = *idx;
834 }
835
836 Ok(())
837 },
838 )?;
839
840 Ok((indices, distances))
841 }
842 }
843}
844
845pub mod hardware_specific_simd {
847 use crate::error::{SpatialError, SpatialResult};
848 use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2};
849 use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
850
851 pub struct HardwareOptimizedDistances {
853 capabilities: PlatformCapabilities,
854 }
855
856 impl Default for HardwareOptimizedDistances {
857 fn default() -> Self {
858 Self::new()
859 }
860 }
861
862 impl HardwareOptimizedDistances {
863 pub fn new() -> Self {
865 Self {
866 capabilities: PlatformCapabilities::detect(),
867 }
868 }
869
870 pub fn euclidean_distance_optimized(
872 &self,
873 a: &ArrayView1<f64>,
874 b: &ArrayView1<f64>,
875 ) -> SpatialResult<f64> {
876 if a.len() != b.len() {
877 return Err(SpatialError::ValueError(
878 "Points must have the same dimension".to_string(),
879 ));
880 }
881
882 let result = if self.capabilities.avx512_available && a.len() >= 8 {
883 HardwareOptimizedDistances::euclidean_distance_avx512(a, b)
884 } else if self.capabilities.avx2_available && a.len() >= 4 {
885 HardwareOptimizedDistances::euclidean_distance_avx2(a, b)
886 } else if self.capabilities.neon_available && a.len() >= 4 {
887 HardwareOptimizedDistances::euclidean_distance_neon(a, b)
888 } else {
889 HardwareOptimizedDistances::euclidean_distance_sse(a, b)
890 };
891
892 Ok(result)
893 }
894
895 fn euclidean_distance_avx512(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
897 const SIMD_WIDTH: usize = 8;
898 let len = a.len();
899 let mut sum = 0.0;
900
901 let chunks = len / SIMD_WIDTH;
903 for chunk in 0..chunks {
904 let start = chunk * SIMD_WIDTH;
905 let end = start + SIMD_WIDTH;
906
907 let a_chunk = a.slice(s![start..end]);
908 let b_chunk = b.slice(s![start..end]);
909
910 let diff = f64::simd_sub(&a_chunk, &b_chunk);
912 let squared = f64::simd_mul(&diff.view(), &diff.view());
913 sum += f64::simd_sum(&squared.view());
914 }
915
916 for i in (chunks * SIMD_WIDTH)..len {
918 let diff = a[i] - b[i];
919 sum += diff * diff;
920 }
921
922 sum.sqrt()
923 }
924
925 fn euclidean_distance_avx2(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
927 const SIMD_WIDTH: usize = 4;
928 let len = a.len();
929 let mut sum = 0.0;
930
931 let chunks = len / SIMD_WIDTH;
933 for chunk in 0..chunks {
934 let start = chunk * SIMD_WIDTH;
935 let end = start + SIMD_WIDTH;
936
937 let a_chunk = a.slice(s![start..end]);
938 let b_chunk = b.slice(s![start..end]);
939
940 let diff = f64::simd_sub(&a_chunk, &b_chunk);
941 let squared = f64::simd_mul(&diff.view(), &diff.view());
942 sum += f64::simd_sum(&squared.view());
943 }
944
945 let remaining = len % SIMD_WIDTH;
947 let start = chunks * SIMD_WIDTH;
948 for i in 0..remaining {
949 let diff = a[start + i] - b[start + i];
950 sum += diff * diff;
951 }
952
953 sum.sqrt()
954 }
955
956 fn euclidean_distance_neon(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
958 const SIMD_WIDTH: usize = 2; let len = a.len();
961 let mut sum = 0.0;
962
963 let chunks = len / SIMD_WIDTH;
964 for chunk in 0..chunks {
965 let start = chunk * SIMD_WIDTH;
966 let end = start + SIMD_WIDTH;
967
968 let a_chunk = a.slice(s![start..end]);
969 let b_chunk = b.slice(s![start..end]);
970
971 let diff = f64::simd_sub(&a_chunk, &b_chunk);
972 let squared = f64::simd_mul(&diff.view(), &diff.view());
973 sum += f64::simd_sum(&squared.view());
974 }
975
976 for i in (chunks * SIMD_WIDTH)..len {
978 let diff = a[i] - b[i];
979 sum += diff * diff;
980 }
981
982 sum.sqrt()
983 }
984
985 fn euclidean_distance_sse(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
987 const SIMD_WIDTH: usize = 2; let len = a.len();
989 let mut sum = 0.0;
990
991 let chunks = len / SIMD_WIDTH;
992 for chunk in 0..chunks {
993 let start = chunk * SIMD_WIDTH;
994 let end = start + SIMD_WIDTH;
995
996 let a_chunk = a.slice(s![start..end]);
997 let b_chunk = b.slice(s![start..end]);
998
999 let diff = f64::simd_sub(&a_chunk, &b_chunk);
1000 let squared = f64::simd_mul(&diff.view(), &diff.view());
1001 sum += f64::simd_sum(&squared.view());
1002 }
1003
1004 for i in (chunks * SIMD_WIDTH)..len {
1006 let diff = a[i] - b[i];
1007 sum += diff * diff;
1008 }
1009
1010 sum.sqrt()
1011 }
1012
1013 pub fn batch_distance_matrix_optimized(
1015 &self,
1016 points: &ArrayView2<'_, f64>,
1017 ) -> SpatialResult<Array2<f64>> {
1018 let n_points = points.nrows();
1019 let mut result = Array2::zeros((n_points, n_points));
1020
1021 const BLOCK_SIZE: usize = 64; for i_block in (0..n_points).step_by(BLOCK_SIZE) {
1026 let i_end = (i_block + BLOCK_SIZE).min(n_points);
1027
1028 for j_block in (i_block..n_points).step_by(BLOCK_SIZE) {
1029 let j_end = (j_block + BLOCK_SIZE).min(n_points);
1030
1031 self.compute_distance_block(
1033 points,
1034 &mut result.view_mut(),
1035 i_block..i_end,
1036 j_block..j_end,
1037 )?;
1038 }
1039 }
1040
1041 for i in 0..n_points {
1043 for j in 0..i {
1044 result[[i, j]] = result[[j, i]];
1045 }
1046 }
1047
1048 Ok(result)
1049 }
1050
1051 fn compute_distance_block(
1053 &self,
1054 points: &ArrayView2<'_, f64>,
1055 result: &mut scirs2_core::ndarray::ArrayViewMut2<f64>,
1056 i_range: std::ops::Range<usize>,
1057 j_range: std::ops::Range<usize>,
1058 ) -> SpatialResult<()> {
1059 for i in i_range {
1060 let point_i = points.row(i);
1061
1062 for j in j_range.clone() {
1063 if i <= j {
1064 let point_j = points.row(j);
1065 let distance = if i == j {
1066 0.0
1067 } else {
1068 self.euclidean_distance_optimized(&point_i, &point_j)?
1069 };
1070 result[[i, j]] = distance;
1071 }
1072 }
1073 }
1074 Ok(())
1075 }
1076
1077 pub fn knn_search_vectorized(
1079 &self,
1080 query_points: &ArrayView2<'_, f64>,
1081 data_points: &ArrayView2<'_, f64>,
1082 k: usize,
1083 ) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
1084 use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
1085 let n_queries = query_points.nrows();
1086 let n_data = data_points.nrows();
1087
1088 if k > n_data {
1089 return Err(SpatialError::ValueError(format!(
1090 "k ({k}) cannot be larger than number of data _points ({n_data})"
1091 )));
1092 }
1093
1094 let mut indices = Array2::zeros((n_queries, k));
1095 let mut distances = Array2::zeros((n_queries, k));
1096
1097 indices
1099 .outer_iter_mut()
1100 .zip(distances.outer_iter_mut())
1101 .enumerate()
1102 .par_bridge()
1103 .try_for_each(
1104 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
1105 let query = query_points.row(query_idx);
1106
1107 let all_distances = self.compute_distances_to_all(&query, data_points)?;
1109
1110 let mut indexed_distances: Vec<(f64, usize)> = all_distances
1112 .iter()
1113 .enumerate()
1114 .map(|(idx, &dist)| (dist, idx))
1115 .collect();
1116
1117 indexed_distances
1118 .select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
1119 indexed_distances[..k]
1120 .sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
1121
1122 for (i, (dist, idx)) in indexed_distances[..k].iter().enumerate() {
1124 dist_row[i] = *dist;
1125 idx_row[i] = *idx;
1126 }
1127
1128 Ok(())
1129 },
1130 )?;
1131
1132 Ok((indices, distances))
1133 }
1134
1135 fn compute_distances_to_all(
1137 &self,
1138 query: &ArrayView1<f64>,
1139 data_points: &ArrayView2<'_, f64>,
1140 ) -> SpatialResult<Array1<f64>> {
1141 let n_data = data_points.nrows();
1142 let mut distances = Array1::zeros(n_data);
1143
1144 const BATCH_SIZE: usize = 32;
1146
1147 for batch_start in (0..n_data).step_by(BATCH_SIZE) {
1148 let batch_end = (batch_start + BATCH_SIZE).min(n_data);
1149
1150 for i in batch_start..batch_end {
1151 let data_point = data_points.row(i);
1152 distances[i] = self.euclidean_distance_optimized(query, &data_point)?;
1153 }
1154 }
1155
1156 Ok(distances)
1157 }
1158
1159 pub fn optimal_simd_block_size(&self) -> usize {
1161 if self.capabilities.avx512_available {
1162 8 } else if self.capabilities.avx2_available {
1164 4 } else {
1166 2 }
1168 }
1169
1170 pub fn report_capabilities(&self) {
1172 println!("Hardware-Specific SIMD Capabilities:");
1173 println!(" AVX-512: {}", self.capabilities.avx512_available);
1174 println!(" AVX2: {}", self.capabilities.avx2_available);
1175 println!(" NEON: {}", self.capabilities.neon_available);
1176 println!(" FMA: {}", self.capabilities.simd_available);
1177 println!(" Optimal block size: {}", self.optimal_simd_block_size());
1178 }
1179 }
1180}
1181
1182pub mod mixed_precision_simd {
1184 use crate::error::{SpatialError, SpatialResult};
1185 use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
1186 use scirs2_core::parallel_ops::*;
1187 use scirs2_core::simd_ops::SimdUnifiedOps;
1188
1189 pub fn simd_euclidean_distance_f32(a: &[f32], b: &[f32]) -> SpatialResult<f32> {
1191 if a.len() != b.len() {
1192 return Err(SpatialError::ValueError(
1193 "Points must have the same dimension".to_string(),
1194 ));
1195 }
1196
1197 let a_view = ArrayView1::from(a);
1198 let b_view = ArrayView1::from(b);
1199
1200 let diff = f32::simd_sub(&a_view, &b_view);
1202 let squared = f32::simd_mul(&diff.view(), &diff.view());
1203 let sum = f32::simd_sum(&squared.view());
1204 Ok(sum.sqrt())
1205 }
1206
1207 pub fn simd_euclidean_distance_batch_f32(
1209 points1: &ArrayView2<f32>,
1210 points2: &ArrayView2<f32>,
1211 ) -> SpatialResult<Array1<f32>> {
1212 if points1.shape() != points2.shape() {
1213 return Err(SpatialError::ValueError(
1214 "Point arrays must have the same shape".to_string(),
1215 ));
1216 }
1217
1218 let n_points = points1.nrows();
1219
1220 let distances_vec: Result<Vec<f32>, SpatialError> = (0..n_points)
1222 .into_par_iter()
1223 .map(|i| -> SpatialResult<f32> {
1224 let p1 = points1.row(i);
1225 let p2 = points2.row(i);
1226 let diff = f32::simd_sub(&p1, &p2);
1227 let squared = f32::simd_mul(&diff.view(), &diff.view());
1228 let sum = f32::simd_sum(&squared.view());
1229 Ok(sum.sqrt())
1230 })
1231 .collect();
1232
1233 Ok(Array1::from(distances_vec?))
1234 }
1235
1236 pub fn adaptive_precision_distance_matrix(
1238 points: &ArrayView2<'_, f64>,
1239 precision_threshold: f64,
1240 ) -> SpatialResult<Array2<f64>> {
1241 let n_points = points.nrows();
1242 let mut result = Array2::zeros((n_points, n_points));
1243
1244 let can_use_f32 = points.iter().all(|&x| x.abs() < precision_threshold);
1246
1247 if can_use_f32 {
1248 let points_f32 = points.mapv(|x| x as f32);
1250
1251 for i in 0..n_points {
1253 for j in i..n_points {
1254 if i == j {
1255 result[[i, j]] = 0.0;
1256 } else {
1257 let p1 = points_f32.row(i).to_vec();
1258 let p2 = points_f32.row(j).to_vec();
1259 let dist = simd_euclidean_distance_f32(&p1, &p2)? as f64;
1260 result[[i, j]] = dist;
1261 result[[j, i]] = dist;
1262 }
1263 }
1264 }
1265 } else {
1266 let optimizer = super::hardware_specific_simd::HardwareOptimizedDistances::new();
1268 result = optimizer.batch_distance_matrix_optimized(points)?;
1269 }
1270
1271 Ok(result)
1272 }
1273}
1274
1275pub mod bench {
1277 use super::mixed_precision_simd::simd_euclidean_distance_batch_f32;
1278 use crate::simd_euclidean_distance_batch;
1279 use scirs2_core::ndarray::ArrayView2;
1280 use scirs2_core::simd_ops::PlatformCapabilities;
1281 use std::time::Instant;
1282
1283 pub fn benchmark_distance_computation(
1285 points1: &ArrayView2<'_, f64>,
1286 points2: &ArrayView2<'_, f64>,
1287 iterations: usize,
1288 ) -> BenchmarkResults {
1289 let mut results = BenchmarkResults::default();
1290
1291 let start = Instant::now();
1293 for _ in 0..iterations {
1294 for (row1, row2) in points1.outer_iter().zip(points2.outer_iter()) {
1295 let _dist =
1296 crate::distance::euclidean(row1.as_slice().unwrap(), row2.as_slice().unwrap());
1297 }
1298 }
1299 results.scalar_time = start.elapsed().as_secs_f64();
1300
1301 let start = Instant::now();
1303 for _ in 0..iterations {
1304 let _distances = simd_euclidean_distance_batch(points1, points2).unwrap();
1305 }
1306 results.simd_f64_time = start.elapsed().as_secs_f64();
1307
1308 if points1.ncols() <= 16 {
1310 let points1_f32 = points1.mapv(|x| x as f32);
1312 let points2_f32 = points2.mapv(|x| x as f32);
1313
1314 let start = Instant::now();
1315 for _ in 0..iterations {
1316 let _distances =
1317 simd_euclidean_distance_batch_f32(&points1_f32.view(), &points2_f32.view())
1318 .unwrap();
1319 }
1320 results.simd_f32_time = Some(start.elapsed().as_secs_f64());
1321 }
1322
1323 results.compute_speedups();
1324 results
1325 }
1326
1327 #[derive(Debug, Default)]
1329 pub struct BenchmarkResults {
1330 pub scalar_time: f64,
1331 pub simd_f64_time: f64,
1332 pub simd_f32_time: Option<f64>,
1333 pub simd_f64_speedup: f64,
1334 pub simd_f32_speedup: Option<f64>,
1335 }
1336
1337 impl BenchmarkResults {
1338 fn compute_speedups(&mut self) {
1339 if self.simd_f64_time > 0.0 {
1340 self.simd_f64_speedup = self.scalar_time / self.simd_f64_time;
1341 }
1342
1343 if let Some(f32_time) = self.simd_f32_time {
1344 if f32_time > 0.0 {
1345 self.simd_f32_speedup = Some(self.scalar_time / f32_time);
1346 }
1347 }
1348 }
1349
1350 pub fn report(&self) {
1352 println!("Advanced-SIMD Performance Benchmark Results:");
1353 println!(" Scalar time: {:.6} seconds", self.scalar_time);
1354 println!(
1355 " SIMD f64 time: {:.6} seconds ({:.2}x speedup)",
1356 self.simd_f64_time, self.simd_f64_speedup
1357 );
1358
1359 if let (Some(f32_time), Some(f32_speedup)) = (self.simd_f32_time, self.simd_f32_speedup)
1360 {
1361 println!(" SIMD f32 time: {f32_time:.6} seconds ({f32_speedup:.2}x speedup)");
1362 }
1363 }
1364 }
1365
1366 pub fn report_simd_features() {
1368 println!("Advanced-SIMD Features Available:");
1369
1370 let caps = PlatformCapabilities::detect();
1371 println!(" SIMD Available: {}", caps.simd_available);
1372 println!(" GPU Available: {}", caps.gpu_available);
1373
1374 if caps.simd_available {
1375 println!(" AVX2: {}", caps.avx2_available);
1376 println!(" AVX512: {}", caps.avx512_available);
1377 println!(" NEON: {}", caps.neon_available);
1378 println!(" FMA Support: {}", caps.simd_available);
1379 }
1380
1381 let theoretical_speedup = if caps.avx512_available {
1383 8.0
1384 } else if caps.avx2_available || caps.neon_available {
1385 4.0
1386 } else {
1387 2.0
1388 };
1389
1390 println!(" Theoretical Max Speedup: {theoretical_speedup:.1}x");
1391 }
1392}
1393
1394#[cfg(test)]
1395mod tests {
1396 use super::hardware_specific_simd::HardwareOptimizedDistances;
1397 use super::{
1398 linear_to_condensed_indices, parallel_cdist, parallel_pdist, simd_chebyshev_distance,
1399 simd_euclidean_distance, simd_euclidean_distance_batch, simd_knn_search,
1400 simd_manhattan_distance,
1401 };
1402 use approx::assert_relative_eq;
1403 use scirs2_core::ndarray::array;
1404
1405 #[test]
1406 fn test_simd_euclidean_distance() {
1407 let a = vec![1.0, 2.0, 3.0];
1408 let b = vec![4.0, 5.0, 6.0];
1409
1410 let simd_dist = simd_euclidean_distance(&a, &b).unwrap();
1411 let scalar_dist = crate::distance::euclidean(&a, &b);
1412
1413 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1414 }
1415
1416 #[test]
1417 fn test_simd_manhattan_distance() {
1418 let a = vec![1.0, 2.0, 3.0];
1419 let b = vec![4.0, 5.0, 6.0];
1420
1421 let simd_dist = simd_manhattan_distance(&a, &b).unwrap();
1422 let scalar_dist = crate::distance::manhattan(&a, &b);
1423
1424 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1425 }
1426
1427 #[test]
1428 fn test_simd_batch_distance() {
1429 let points1 = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
1430 let points2 = array![[2.0, 3.0], [4.0, 5.0], [6.0, 7.0]];
1431
1432 let distances = simd_euclidean_distance_batch(&points1.view(), &points2.view()).unwrap();
1433
1434 assert_eq!(distances.len(), 3);
1435 for &dist in distances.iter() {
1436 assert!(dist > 0.0);
1437 assert!(dist.is_finite());
1438 }
1439
1440 for i in 0..3 {
1442 let p1 = points1.row(i).to_vec();
1443 let p2 = points2.row(i).to_vec();
1444 let expected = crate::distance::euclidean(&p1, &p2);
1445 assert_relative_eq!(distances[i], expected, epsilon = 1e-10);
1446 }
1447 }
1448
1449 #[test]
1450 fn test_parallel_pdist() {
1451 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1452
1453 let distances = parallel_pdist(&points.view(), "euclidean").unwrap();
1454
1455 assert_eq!(distances.len(), 6);
1457
1458 for &dist in distances.iter() {
1460 assert!(dist > 0.0);
1461 assert!(dist.is_finite());
1462 }
1463 }
1464
1465 #[test]
1466 fn test_parallel_cdist() {
1467 let points1 = array![[0.0, 0.0], [1.0, 1.0]];
1468 let points2 = array![[1.0, 0.0], [0.0, 1.0], [2.0, 2.0]];
1469
1470 let distances = parallel_cdist(&points1.view(), &points2.view(), "euclidean").unwrap();
1471
1472 assert_eq!(distances.shape(), &[2, 3]);
1473
1474 for &dist in distances.iter() {
1476 assert!(dist >= 0.0);
1477 assert!(dist.is_finite());
1478 }
1479 }
1480
1481 #[test]
1482 fn test_simd_knn_search() {
1483 let data_points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0], [2.0, 2.0]];
1484 let query_points = array![[0.5, 0.5], [1.5, 1.5]];
1485
1486 let (indices, distances) =
1487 simd_knn_search(&query_points.view(), &data_points.view(), 3, "euclidean").unwrap();
1488
1489 assert_eq!(indices.shape(), &[2, 3]);
1490 assert_eq!(distances.shape(), &[2, 3]);
1491
1492 for row in distances.outer_iter() {
1494 for i in 1..row.len() {
1495 assert!(row[i] >= row[i - 1]);
1496 }
1497 }
1498
1499 for &idx in indices.iter() {
1501 assert!(idx < data_points.nrows());
1502 }
1503 }
1504
1505 #[test]
1506 fn test_linear_to_condensed_indices() {
1507 let n = 4;
1509 let expected_pairs = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)];
1510
1511 for (linear_idx, expected) in expected_pairs.iter().enumerate() {
1512 let result = linear_to_condensed_indices(linear_idx, n);
1513 assert_eq!(result, *expected);
1514 }
1515 }
1516
1517 #[test]
1518 fn test_simd_chebyshev_distance() {
1519 let a = vec![1.0, 2.0, 3.0];
1520 let b = vec![4.0, 5.0, 1.0];
1521
1522 let dist = simd_chebyshev_distance(&a, &b).unwrap();
1523
1524 assert_relative_eq!(dist, 3.0, epsilon = 1e-10);
1526 }
1527
1528 #[test]
1529 fn test_different_metrics() {
1530 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
1531
1532 let metrics = ["euclidean", "manhattan", "sqeuclidean", "chebyshev"];
1533
1534 for metric in &metrics {
1535 let distances = parallel_pdist(&points.view(), metric).unwrap();
1536 assert_eq!(distances.len(), 3); for &dist in distances.iter() {
1539 assert!(dist >= 0.0);
1540 assert!(dist.is_finite());
1541 }
1542 }
1543 }
1544
1545 #[test]
1546 fn test_error_handling() {
1547 let a = vec![1.0, 2.0];
1548 let b = vec![1.0, 2.0, 3.0]; let result = simd_euclidean_distance(&a, &b);
1551 assert!(result.is_err());
1552
1553 let result = simd_manhattan_distance(&a, &b);
1554 assert!(result.is_err());
1555 }
1556
1557 #[test]
1558 fn test_large_dimension_vectors() {
1559 let a = vec![0.0, 1.0, 2.0];
1561 let b = vec![1.0, 2.0, 3.0];
1562
1563 let simd_dist = simd_euclidean_distance(&a, &b).unwrap();
1564 let scalar_dist = crate::distance::euclidean(&a, &b);
1565
1566 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1568
1569 let dim = 1000; let a: Vec<f64> = (0..dim).map(|i| i as f64).collect();
1572 let b: Vec<f64> = (0..dim).map(|i| (i + 1) as f64).collect();
1573
1574 let simd_dist = simd_euclidean_distance(&a, &b).unwrap();
1575 let scalar_dist = crate::distance::euclidean(&a, &b);
1576
1577 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1579 }
1580
1581 #[test]
1582 #[ignore]
1583 fn test_hardware_optimized_distances() {
1584 use super::hardware_specific_simd::HardwareOptimizedDistances;
1585
1586 let optimizer = HardwareOptimizedDistances::new();
1587
1588 let a = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1589 let b = array![2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
1590
1591 let optimized_dist = optimizer
1592 .euclidean_distance_optimized(&a.view(), &b.view())
1593 .unwrap();
1594 let scalar_dist = crate::distance::euclidean(a.as_slice().unwrap(), b.as_slice().unwrap());
1595
1596 assert_relative_eq!(optimized_dist, scalar_dist, epsilon = 1e-10);
1597 }
1598
1599 #[test]
1600 #[ignore]
1601 fn test_hardware_optimized_batch_matrix() {
1602 let points = array![
1603 [0.0, 0.0],
1604 [1.0, 0.0],
1605 [0.0, 1.0],
1606 [1.0, 1.0],
1607 [2.0, 2.0],
1608 [3.0, 3.0],
1609 [4.0, 4.0],
1610 [5.0, 5.0]
1611 ];
1612
1613 let optimizer = HardwareOptimizedDistances::new();
1614 let result = optimizer.batch_distance_matrix_optimized(&points.view());
1615
1616 assert!(result.is_ok());
1617 let matrix = result.unwrap();
1618 assert_eq!(matrix.dim(), (8, 8));
1619
1620 for i in 0..8 {
1622 assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-10);
1623 }
1624
1625 for i in 0..8 {
1627 for j in 0..8 {
1628 assert_relative_eq!(matrix[[i, j]], matrix[[j, i]], epsilon = 1e-10);
1629 }
1630 }
1631 }
1632
1633 #[test]
1634 #[ignore]
1635 fn test_hardware_optimized_knn() {
1636 let data_points = array![
1637 [0.0, 0.0],
1638 [1.0, 0.0],
1639 [0.0, 1.0],
1640 [1.0, 1.0],
1641 [2.0, 2.0],
1642 [3.0, 3.0],
1643 [4.0, 4.0],
1644 [5.0, 5.0]
1645 ];
1646 let query_points = array![[0.5, 0.5], [2.5, 2.5]];
1647
1648 let optimizer = HardwareOptimizedDistances::new();
1649 let result = optimizer.knn_search_vectorized(&query_points.view(), &data_points.view(), 3);
1650
1651 assert!(result.is_ok());
1652 let (indices, distances) = result.unwrap();
1653
1654 assert_eq!(indices.dim(), (2, 3));
1655 assert_eq!(distances.dim(), (2, 3));
1656
1657 for row in distances.outer_iter() {
1659 for i in 1..row.len() {
1660 assert!(row[i] >= row[i - 1]);
1661 }
1662 }
1663 }
1664
1665 #[test]
1666 #[ignore]
1667 fn test_mixed_precision_adaptive() {
1668 use super::mixed_precision_simd::adaptive_precision_distance_matrix;
1669
1670 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1672
1673 let result = adaptive_precision_distance_matrix(&points.view(), 1e6);
1674 assert!(result.is_ok());
1675
1676 let matrix = result.unwrap();
1677 assert_eq!(matrix.dim(), (4, 4));
1678
1679 for i in 0..4 {
1681 assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-6);
1682 }
1683 }
1684
1685 #[test]
1686 fn test_simd_capabilities_reporting() {
1687 let optimizer = HardwareOptimizedDistances::new();
1688
1689 optimizer.report_capabilities();
1691
1692 let block_size = optimizer.optimal_simd_block_size();
1693 assert!((2..=8).contains(&block_size));
1694 }
1695}