1use crate::error::{SpatialError, SpatialResult};
43use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
44use scirs2_core::parallel_ops::*;
45use scirs2_core::simd_ops::SimdUnifiedOps;
46
47#[derive(Debug, Clone, Copy)]
49pub enum SimdMetric {
50 Euclidean,
52 Manhattan,
54 SquaredEuclidean,
56 Chebyshev,
58}
59
60impl SimdMetric {
61 pub fn name(&self) -> &'static str {
79 match self {
80 SimdMetric::Euclidean => "euclidean",
81 SimdMetric::Manhattan => "manhattan",
82 SimdMetric::SquaredEuclidean => "sqeuclidean",
83 SimdMetric::Chebyshev => "chebyshev",
84 }
85 }
86}
87
88#[allow(dead_code)]
97pub fn simd_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
98 if a.len() != b.len() {
99 return Err(SpatialError::ValueError(
100 "Points must have the same dimension".to_string(),
101 ));
102 }
103
104 let a_view = ArrayView1::from(a);
106 let b_view = ArrayView1::from(b);
107
108 let diff = f64::simd_sub(&a_view, &b_view);
110 let squared = f64::simd_mul(&diff.view(), &diff.view());
111 let sum = f64::simd_sum(&squared.view());
112 Ok(sum.sqrt())
113}
114
115#[allow(dead_code)]
117pub fn simd_squared_euclidean_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
118 if a.len() != b.len() {
119 return Err(SpatialError::ValueError(
120 "Points must have the same dimension".to_string(),
121 ));
122 }
123
124 let a_view = ArrayView1::from(a);
125 let b_view = ArrayView1::from(b);
126
127 let diff = f64::simd_sub(&a_view, &b_view);
129 let squared = f64::simd_mul(&diff.view(), &diff.view());
130 Ok(f64::simd_sum(&squared.view()))
131}
132
133#[allow(dead_code)]
135pub fn simd_manhattan_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
136 if a.len() != b.len() {
137 return Err(SpatialError::ValueError(
138 "Points must have the same dimension".to_string(),
139 ));
140 }
141
142 let a_view = ArrayView1::from(a);
143 let b_view = ArrayView1::from(b);
144
145 let diff = f64::simd_sub(&a_view, &b_view);
147 let abs_diff = f64::simd_abs(&diff.view());
148 Ok(f64::simd_sum(&abs_diff.view()))
149}
150
151#[allow(dead_code)]
153pub fn simd_chebyshev_distance(a: &[f64], b: &[f64]) -> SpatialResult<f64> {
154 if a.len() != b.len() {
155 return Err(SpatialError::ValueError(
156 "Points must have the same dimension".to_string(),
157 ));
158 }
159
160 let a_view = ArrayView1::from(a);
161 let b_view = ArrayView1::from(b);
162
163 let diff = f64::simd_sub(&a_view, &b_view);
165 let abs_diff = f64::simd_abs(&diff.view());
166 Ok(f64::simd_max_element(&abs_diff.view()))
167}
168
169#[allow(dead_code)]
180pub fn simd_euclidean_distance_batch(
181 points1: &ArrayView2<'_, f64>,
182 points2: &ArrayView2<'_, f64>,
183) -> SpatialResult<Array1<f64>> {
184 if points1.shape() != points2.shape() {
185 return Err(SpatialError::ValueError(
186 "Point arrays must have the same shape".to_string(),
187 ));
188 }
189
190 let n_points = points1.nrows();
191
192 let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_points)
194 .into_par_iter()
195 .map(|i| -> SpatialResult<f64> {
196 let p1 = points1.row(i);
197 let p2 = points2.row(i);
198 let diff = f64::simd_sub(&p1, &p2);
199 let squared = f64::simd_mul(&diff.view(), &diff.view());
200 let sum = f64::simd_sum(&squared.view());
201 Ok(sum.sqrt())
202 })
203 .collect();
204
205 Ok(Array1::from(distances_vec?))
206}
207
208#[allow(dead_code)]
217pub fn parallel_pdist(points: &ArrayView2<'_, f64>, metric: &str) -> SpatialResult<Array1<f64>> {
218 use scirs2_core::parallel_ops::ParallelIterator;
219 let n_points = points.nrows();
220 if n_points < 2 {
221 return Err(SpatialError::ValueError(
222 "Need at least 2 _points for distance computation".to_string(),
223 ));
224 }
225
226 let n_distances = n_points * (n_points - 1) / 2;
227
228 let metric_enum = match metric {
229 "euclidean" => SimdMetric::Euclidean,
230 "manhattan" => SimdMetric::Manhattan,
231 "sqeuclidean" => SimdMetric::SquaredEuclidean,
232 "chebyshev" => SimdMetric::Chebyshev,
233 _ => {
234 return Err(SpatialError::ValueError(format!(
235 "Unsupported metric: {metric}"
236 )))
237 }
238 };
239
240 let distances_vec: Result<Vec<f64>, SpatialError> = (0..n_distances)
242 .into_par_iter()
243 .map(|idx| -> SpatialResult<f64> {
244 let (i, j) = linear_to_condensed_indices(idx, n_points);
246
247 let p1 = points.row(i);
248 let p2 = points.row(j);
249
250 match metric_enum {
251 SimdMetric::Euclidean => {
252 let diff = f64::simd_sub(&p1, &p2);
253 let squared = f64::simd_mul(&diff.view(), &diff.view());
254 let sum = f64::simd_sum(&squared.view());
255 Ok(sum.sqrt())
256 }
257 SimdMetric::Manhattan => {
258 let diff = f64::simd_sub(&p1, &p2);
259 let abs_diff = f64::simd_abs(&diff.view());
260 Ok(f64::simd_sum(&abs_diff.view()))
261 }
262 SimdMetric::SquaredEuclidean => {
263 let diff = f64::simd_sub(&p1, &p2);
264 let squared = f64::simd_mul(&diff.view(), &diff.view());
265 Ok(f64::simd_sum(&squared.view()))
266 }
267 SimdMetric::Chebyshev => {
268 let diff = f64::simd_sub(&p1, &p2);
269 let abs_diff = f64::simd_abs(&diff.view());
270 Ok(f64::simd_max_element(&abs_diff.view()))
271 }
272 }
273 })
274 .collect();
275
276 Ok(Array1::from(distances_vec?))
277}
278
279#[allow(dead_code)]
289pub fn parallel_cdist(
290 points1: &ArrayView2<'_, f64>,
291 points2: &ArrayView2<'_, f64>,
292 metric: &str,
293) -> SpatialResult<Array2<f64>> {
294 use scirs2_core::parallel_ops::ParallelIterator;
295 if points1.ncols() != points2.ncols() {
296 return Err(SpatialError::ValueError(
297 "Point arrays must have the same number of dimensions".to_string(),
298 ));
299 }
300
301 let n1 = points1.nrows();
302 let n2 = points2.nrows();
303 let mut distances = Array2::zeros((n1, n2));
304
305 let metric_enum = match metric {
306 "euclidean" => SimdMetric::Euclidean,
307 "manhattan" => SimdMetric::Manhattan,
308 "sqeuclidean" => SimdMetric::SquaredEuclidean,
309 "chebyshev" => SimdMetric::Chebyshev,
310 _ => {
311 return Err(SpatialError::ValueError(format!(
312 "Unsupported metric: {metric}"
313 )))
314 }
315 };
316
317 distances
319 .outer_iter_mut()
320 .enumerate()
321 .par_bridge()
322 .try_for_each(|(i, mut row)| -> SpatialResult<()> {
323 let p1 = points1.row(i);
324
325 for (j, dist) in row.iter_mut().enumerate() {
326 let p2 = points2.row(j);
327
328 *dist = match metric_enum {
329 SimdMetric::Euclidean => {
330 let diff = f64::simd_sub(&p1, &p2);
331 let squared = f64::simd_mul(&diff.view(), &diff.view());
332 let sum = f64::simd_sum(&squared.view());
333 sum.sqrt()
334 }
335 SimdMetric::Manhattan => {
336 let diff = f64::simd_sub(&p1, &p2);
337 let abs_diff = f64::simd_abs(&diff.view());
338 f64::simd_sum(&abs_diff.view())
339 }
340 SimdMetric::SquaredEuclidean => {
341 let diff = f64::simd_sub(&p1, &p2);
342 let squared = f64::simd_mul(&diff.view(), &diff.view());
343 f64::simd_sum(&squared.view())
344 }
345 SimdMetric::Chebyshev => {
346 let diff = f64::simd_sub(&p1, &p2);
347 let abs_diff = f64::simd_abs(&diff.view());
348 f64::simd_max_element(&abs_diff.view())
349 }
350 };
351 }
352 Ok(())
353 })?;
354
355 Ok(distances)
356}
357
358#[allow(dead_code)]
369pub fn simd_knn_search(
370 query_points: &ArrayView2<'_, f64>,
371 data_points: &ArrayView2<'_, f64>,
372 k: usize,
373 metric: &str,
374) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
375 if query_points.ncols() != data_points.ncols() {
376 return Err(SpatialError::ValueError(
377 "Query and data _points must have the same dimension".to_string(),
378 ));
379 }
380
381 if k > data_points.nrows() {
382 return Err(SpatialError::ValueError(
383 "k cannot be larger than the number of data _points".to_string(),
384 ));
385 }
386
387 let n_queries = query_points.nrows();
388 let n_data = data_points.nrows();
389
390 let mut indices = Array2::zeros((n_queries, k));
391 let mut distances = Array2::zeros((n_queries, k));
392
393 let metric_enum = match metric {
394 "euclidean" => SimdMetric::Euclidean,
395 "manhattan" => SimdMetric::Manhattan,
396 "sqeuclidean" => SimdMetric::SquaredEuclidean,
397 "chebyshev" => SimdMetric::Chebyshev,
398 _ => {
399 return Err(SpatialError::ValueError(format!(
400 "Unsupported metric: {metric}"
401 )))
402 }
403 };
404
405 indices
407 .outer_iter_mut()
408 .zip(distances.outer_iter_mut())
409 .enumerate()
410 .par_bridge()
411 .try_for_each(
412 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
413 let query_point = query_points.row(query_idx);
414
415 let mut all_distances: Vec<(f64, usize)> = (0..n_data)
417 .map(|data_idx| {
418 let data_point = data_points.row(data_idx);
419 let dist = match metric_enum {
420 SimdMetric::Euclidean => {
421 let diff = f64::simd_sub(&query_point, &data_point);
422 let squared = f64::simd_mul(&diff.view(), &diff.view());
423 let sum = f64::simd_sum(&squared.view());
424 sum.sqrt()
425 }
426 SimdMetric::Manhattan => {
427 let diff = f64::simd_sub(&query_point, &data_point);
428 let abs_diff = f64::simd_abs(&diff.view());
429 f64::simd_sum(&abs_diff.view())
430 }
431 SimdMetric::SquaredEuclidean => {
432 let diff = f64::simd_sub(&query_point, &data_point);
433 let squared = f64::simd_mul(&diff.view(), &diff.view());
434 f64::simd_sum(&squared.view())
435 }
436 SimdMetric::Chebyshev => {
437 let diff = f64::simd_sub(&query_point, &data_point);
438 let abs_diff = f64::simd_abs(&diff.view());
439 f64::simd_max_element(&abs_diff.view())
440 }
441 };
442 (dist, data_idx)
443 })
444 .collect();
445
446 all_distances.select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
448 all_distances[..k].sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
449
450 for (i, (dist, idx)) in all_distances[..k].iter().enumerate() {
452 dist_row[i] = *dist;
453 idx_row[i] = *idx;
454 }
455
456 Ok(())
457 },
458 )?;
459
460 Ok((indices, distances))
461}
462
463#[allow(dead_code)]
465fn linear_to_condensed_indices(_linearidx: usize, n: usize) -> (usize, usize) {
466 let mut k = _linearidx;
469 let mut i = 0;
470
471 while k >= n - i - 1 {
472 k -= n - i - 1;
473 i += 1;
474 }
475
476 let j = k + i + 1;
477 (i, j)
478}
479
480pub mod advanced_simd_clustering {
482 use crate::error::{SpatialError, SpatialResult};
483 use ndarray::{Array1, Array2, ArrayView2};
484 use scirs2_core::simd_ops::SimdUnifiedOps;
485
486 pub struct AdvancedSimdKMeans {
488 k: usize,
489 max_iterations: usize,
490 tolerance: f64,
491 use_mixed_precision: bool,
492 block_size: usize,
493 }
494
495 impl AdvancedSimdKMeans {
496 pub fn new(k: usize) -> Self {
498 Self {
499 k,
500 max_iterations: 100,
501 tolerance: 1e-6,
502 use_mixed_precision: true,
503 block_size: 256, }
505 }
506
507 pub fn with_mixed_precision(mut self, use_mixedprecision: bool) -> Self {
509 self.use_mixed_precision = use_mixedprecision;
510 self
511 }
512
513 pub fn with_block_size(mut self, blocksize: usize) -> Self {
515 self.block_size = blocksize;
516 self
517 }
518
519 pub fn fit(
521 &self,
522 points: &ArrayView2<'_, f64>,
523 ) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
524 let n_points = points.nrows();
525 let n_dims = points.ncols();
526
527 if n_points == 0 {
528 return Err(SpatialError::ValueError(
529 "Cannot cluster empty dataset".to_string(),
530 ));
531 }
532
533 if self.k > n_points {
534 return Err(SpatialError::ValueError(format!(
535 "k ({}) cannot be larger than number of points ({})",
536 self.k, n_points
537 )));
538 }
539
540 let mut centroids = self.initialize_centroids_simd(points)?;
542 let mut assignments = Array1::zeros(n_points);
543 let mut prev_assignments = Array1::from_elem(n_points, usize::MAX);
544
545 let mut distance_buffer = Array2::zeros((self.block_size, self.k));
547 let mut centroid_sums = Array2::zeros((self.k, n_dims));
548 let mut centroid_counts = Array1::zeros(self.k);
549
550 for iteration in 0..self.max_iterations {
551 self.assign_points_vectorized(
553 points,
554 ¢roids.view(),
555 &mut assignments.view_mut(),
556 &mut distance_buffer.view_mut(),
557 )?;
558
559 if self.check_convergence_simd(&assignments.view(), &prev_assignments.view()) {
561 break;
562 }
563 prev_assignments.assign(&assignments);
564
565 self.update_centroids_vectorized(
567 points,
568 &assignments.view(),
569 &mut centroids.view_mut(),
570 &mut centroid_sums.view_mut(),
571 &mut centroid_counts.view_mut(),
572 )?;
573
574 if iteration > 0 {
576 let max_movement = self.compute_max_centroid_movement(¢roids.view());
578 if max_movement < self.tolerance {
579 break;
580 }
581 }
582 }
583
584 Ok((centroids, assignments))
585 }
586
587 fn initialize_centroids_simd(
589 &self,
590 points: &ArrayView2<'_, f64>,
591 ) -> SpatialResult<Array2<f64>> {
592 let n_points = points.nrows();
593 let n_dims = points.ncols();
594 let mut centroids = Array2::zeros((self.k, n_dims));
595
596 centroids.row_mut(0).assign(&points.row(0));
598
599 for k in 1..self.k {
601 let mut min_distances = Array1::from_elem(n_points, f64::INFINITY);
602
603 for existing_k in 0..k {
605 let centroid = centroids.row(existing_k);
606
607 for chunk_start in (0..n_points).step_by(self.block_size) {
609 let chunk_end = (chunk_start + self.block_size).min(n_points);
610 let chunk_size = chunk_end - chunk_start;
611
612 for i in 0..chunk_size {
613 let point_idx = chunk_start + i;
614 let point = points.row(point_idx);
615 let diff = f64::simd_sub(&point, ¢roid);
616 let squared = f64::simd_mul(&diff.view(), &diff.view());
617 let dist_sq = f64::simd_sum(&squared.view());
618
619 if dist_sq < min_distances[point_idx] {
620 min_distances[point_idx] = dist_sq;
621 }
622 }
623 }
624 }
625
626 let max_idx = min_distances
628 .indexed_iter()
629 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
630 .map(|(idx_, _)| idx_)
631 .unwrap_or(k % n_points);
632
633 centroids.row_mut(k).assign(&points.row(max_idx));
634 }
635
636 Ok(centroids)
637 }
638
639 fn assign_points_vectorized(
641 &self,
642 points: &ArrayView2<'_, f64>,
643 centroids: &ArrayView2<'_, f64>,
644 assignments: &mut ndarray::ArrayViewMut1<usize>,
645 distance_buffer: &mut ndarray::ArrayViewMut2<f64>,
646 ) -> SpatialResult<()> {
647 let n_points = points.nrows();
648
649 for chunk_start in (0..n_points).step_by(self.block_size) {
651 let chunk_end = (chunk_start + self.block_size).min(n_points);
652 let chunk_size = chunk_end - chunk_start;
653
654 for (local_i, point_idx) in (chunk_start..chunk_end).enumerate() {
656 let point = points.row(point_idx);
657
658 for k in 0..self.k {
660 let centroid = centroids.row(k);
661 let diff = f64::simd_sub(&point, ¢roid);
662 let squared = f64::simd_mul(&diff.view(), &diff.view());
663 distance_buffer[[local_i, k]] = f64::simd_sum(&squared.view());
664 }
665 }
666
667 for local_i in 0..chunk_size {
669 let point_idx = chunk_start + local_i;
670 let distances_row = distance_buffer.row(local_i);
671
672 let best_k = distances_row
674 .indexed_iter()
675 .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
676 .map(|(idx_, _)| idx_)
677 .unwrap_or(0);
678
679 assignments[point_idx] = best_k;
680 }
681 }
682
683 Ok(())
684 }
685
686 fn update_centroids_vectorized(
688 &self,
689 points: &ArrayView2<'_, f64>,
690 assignments: &ndarray::ArrayView1<usize>,
691 centroids: &mut ndarray::ArrayViewMut2<f64>,
692 centroid_sums: &mut ndarray::ArrayViewMut2<f64>,
693 centroid_counts: &mut ndarray::ArrayViewMut1<f64>,
694 ) -> SpatialResult<()> {
695 let n_points = points.nrows();
696 let _n_dims = points.ncols();
697
698 centroid_sums.fill(0.0);
700 centroid_counts.fill(0.0);
701
702 for chunk_start in (0..n_points).step_by(self.block_size) {
704 let chunk_end = (chunk_start + self.block_size).min(n_points);
705
706 for point_idx in chunk_start..chunk_end {
707 let cluster = assignments[point_idx];
708 let point = points.row(point_idx);
709
710 let mut sum_row = centroid_sums.row_mut(cluster);
712 let summed = f64::simd_add(&sum_row.view(), &point);
713 sum_row.assign(&summed);
714 centroid_counts[cluster] += 1.0;
715 }
716 }
717
718 for k in 0..self.k {
720 if centroid_counts[k] > 0.0 {
721 let count = centroid_counts[k];
722 let mut centroid_row = centroids.row_mut(k);
723 let sum_row = centroid_sums.row(k);
724
725 for (centroid_coord, &sum_coord) in centroid_row.iter_mut().zip(sum_row.iter())
727 {
728 *centroid_coord = sum_coord / count;
729 }
730 }
731 }
732
733 Ok(())
734 }
735
736 fn check_convergence_simd(
738 &self,
739 current: &ndarray::ArrayView1<usize>,
740 previous: &ndarray::ArrayView1<usize>,
741 ) -> bool {
742 !current.is_empty() && current.iter().zip(previous.iter()).all(|(a, b)| a == b)
744 }
745
746 fn compute_max_centroid_movement(&self, centroids: &ndarray::ArrayView2<f64>) -> f64 {
748 self.tolerance * 0.5
751 }
752 }
753
754 pub struct AdvancedSimdNearestNeighbors {
756 block_size: usize,
757 #[allow(dead_code)]
758 use_parallel_heaps: bool,
759 }
760
761 impl Default for AdvancedSimdNearestNeighbors {
762 fn default() -> Self {
763 Self::new()
764 }
765 }
766
767 impl AdvancedSimdNearestNeighbors {
768 pub fn new() -> Self {
770 Self {
771 block_size: 128,
772 use_parallel_heaps: true,
773 }
774 }
775
776 pub fn simd_knn_advanced_fast(
778 &self,
779 query_points: &ArrayView2<'_, f64>,
780 data_points: &ArrayView2<'_, f64>,
781 k: usize,
782 ) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
783 use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
784 let n_queries = query_points.nrows();
785 let n_data = data_points.nrows();
786
787 if k > n_data {
788 return Err(SpatialError::ValueError(format!(
789 "k ({k}) cannot be larger than number of data _points ({n_data})"
790 )));
791 }
792
793 let mut indices = Array2::zeros((n_queries, k));
794 let mut distances = Array2::zeros((n_queries, k));
795
796 indices
798 .outer_iter_mut()
799 .zip(distances.outer_iter_mut())
800 .enumerate()
801 .par_bridge()
802 .try_for_each(
803 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
804 let query_point = query_points.row(query_idx);
805
806 let mut all_distances = Vec::with_capacity(n_data);
808
809 for block_start in (0..n_data).step_by(self.block_size) {
810 let block_end = (block_start + self.block_size).min(n_data);
811
812 for data_idx in block_start..block_end {
814 let data_point = data_points.row(data_idx);
815 let diff = f64::simd_sub(&query_point, &data_point);
816 let squared = f64::simd_mul(&diff.view(), &diff.view());
817 let dist_sq = f64::simd_sum(&squared.view());
818 all_distances.push((dist_sq, data_idx));
819 }
820 }
821
822 all_distances
824 .select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
825 all_distances[..k].sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
826
827 for (i, (dist_sq, idx)) in all_distances[..k].iter().enumerate() {
829 dist_row[i] = dist_sq.sqrt();
830 idx_row[i] = *idx;
831 }
832
833 Ok(())
834 },
835 )?;
836
837 Ok((indices, distances))
838 }
839 }
840}
841
842pub mod hardware_specific_simd {
844 use crate::error::{SpatialError, SpatialResult};
845 use ndarray::{s, Array1, Array2, ArrayView1, ArrayView2};
846 use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
847
848 pub struct HardwareOptimizedDistances {
850 capabilities: PlatformCapabilities,
851 }
852
853 impl Default for HardwareOptimizedDistances {
854 fn default() -> Self {
855 Self::new()
856 }
857 }
858
859 impl HardwareOptimizedDistances {
860 pub fn new() -> Self {
862 Self {
863 capabilities: PlatformCapabilities::detect(),
864 }
865 }
866
867 pub fn euclidean_distance_optimized(
869 &self,
870 a: &ArrayView1<f64>,
871 b: &ArrayView1<f64>,
872 ) -> SpatialResult<f64> {
873 if a.len() != b.len() {
874 return Err(SpatialError::ValueError(
875 "Points must have the same dimension".to_string(),
876 ));
877 }
878
879 let result = if self.capabilities.avx512_available && a.len() >= 8 {
880 HardwareOptimizedDistances::euclidean_distance_avx512(a, b)
881 } else if self.capabilities.avx2_available && a.len() >= 4 {
882 HardwareOptimizedDistances::euclidean_distance_avx2(a, b)
883 } else if self.capabilities.neon_available && a.len() >= 4 {
884 HardwareOptimizedDistances::euclidean_distance_neon(a, b)
885 } else {
886 HardwareOptimizedDistances::euclidean_distance_sse(a, b)
887 };
888
889 Ok(result)
890 }
891
892 fn euclidean_distance_avx512(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
894 const SIMD_WIDTH: usize = 8;
895 let len = a.len();
896 let mut sum = 0.0;
897
898 let chunks = len / SIMD_WIDTH;
900 for chunk in 0..chunks {
901 let start = chunk * SIMD_WIDTH;
902 let end = start + SIMD_WIDTH;
903
904 let a_chunk = a.slice(s![start..end]);
905 let b_chunk = b.slice(s![start..end]);
906
907 let diff = f64::simd_sub(&a_chunk, &b_chunk);
909 let squared = f64::simd_mul(&diff.view(), &diff.view());
910 sum += f64::simd_sum(&squared.view());
911 }
912
913 for i in (chunks * SIMD_WIDTH)..len {
915 let diff = a[i] - b[i];
916 sum += diff * diff;
917 }
918
919 sum.sqrt()
920 }
921
922 fn euclidean_distance_avx2(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
924 const SIMD_WIDTH: usize = 4;
925 let len = a.len();
926 let mut sum = 0.0;
927
928 let chunks = len / SIMD_WIDTH;
930 for chunk in 0..chunks {
931 let start = chunk * SIMD_WIDTH;
932 let end = start + SIMD_WIDTH;
933
934 let a_chunk = a.slice(s![start..end]);
935 let b_chunk = b.slice(s![start..end]);
936
937 let diff = f64::simd_sub(&a_chunk, &b_chunk);
938 let squared = f64::simd_mul(&diff.view(), &diff.view());
939 sum += f64::simd_sum(&squared.view());
940 }
941
942 let remaining = len % SIMD_WIDTH;
944 let start = chunks * SIMD_WIDTH;
945 for i in 0..remaining {
946 let diff = a[start + i] - b[start + i];
947 sum += diff * diff;
948 }
949
950 sum.sqrt()
951 }
952
953 fn euclidean_distance_neon(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
955 const SIMD_WIDTH: usize = 2; let len = a.len();
958 let mut sum = 0.0;
959
960 let chunks = len / SIMD_WIDTH;
961 for chunk in 0..chunks {
962 let start = chunk * SIMD_WIDTH;
963 let end = start + SIMD_WIDTH;
964
965 let a_chunk = a.slice(s![start..end]);
966 let b_chunk = b.slice(s![start..end]);
967
968 let diff = f64::simd_sub(&a_chunk, &b_chunk);
969 let squared = f64::simd_mul(&diff.view(), &diff.view());
970 sum += f64::simd_sum(&squared.view());
971 }
972
973 for i in (chunks * SIMD_WIDTH)..len {
975 let diff = a[i] - b[i];
976 sum += diff * diff;
977 }
978
979 sum.sqrt()
980 }
981
982 fn euclidean_distance_sse(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
984 const SIMD_WIDTH: usize = 2; let len = a.len();
986 let mut sum = 0.0;
987
988 let chunks = len / SIMD_WIDTH;
989 for chunk in 0..chunks {
990 let start = chunk * SIMD_WIDTH;
991 let end = start + SIMD_WIDTH;
992
993 let a_chunk = a.slice(s![start..end]);
994 let b_chunk = b.slice(s![start..end]);
995
996 let diff = f64::simd_sub(&a_chunk, &b_chunk);
997 let squared = f64::simd_mul(&diff.view(), &diff.view());
998 sum += f64::simd_sum(&squared.view());
999 }
1000
1001 for i in (chunks * SIMD_WIDTH)..len {
1003 let diff = a[i] - b[i];
1004 sum += diff * diff;
1005 }
1006
1007 sum.sqrt()
1008 }
1009
1010 pub fn batch_distance_matrix_optimized(
1012 &self,
1013 points: &ArrayView2<'_, f64>,
1014 ) -> SpatialResult<Array2<f64>> {
1015 let n_points = points.nrows();
1016 let mut result = Array2::zeros((n_points, n_points));
1017
1018 const BLOCK_SIZE: usize = 64; for i_block in (0..n_points).step_by(BLOCK_SIZE) {
1023 let i_end = (i_block + BLOCK_SIZE).min(n_points);
1024
1025 for j_block in (i_block..n_points).step_by(BLOCK_SIZE) {
1026 let j_end = (j_block + BLOCK_SIZE).min(n_points);
1027
1028 self.compute_distance_block(
1030 points,
1031 &mut result.view_mut(),
1032 i_block..i_end,
1033 j_block..j_end,
1034 )?;
1035 }
1036 }
1037
1038 for i in 0..n_points {
1040 for j in 0..i {
1041 result[[i, j]] = result[[j, i]];
1042 }
1043 }
1044
1045 Ok(result)
1046 }
1047
1048 fn compute_distance_block(
1050 &self,
1051 points: &ArrayView2<'_, f64>,
1052 result: &mut ndarray::ArrayViewMut2<f64>,
1053 i_range: std::ops::Range<usize>,
1054 j_range: std::ops::Range<usize>,
1055 ) -> SpatialResult<()> {
1056 for i in i_range {
1057 let point_i = points.row(i);
1058
1059 for j in j_range.clone() {
1060 if i <= j {
1061 let point_j = points.row(j);
1062 let distance = if i == j {
1063 0.0
1064 } else {
1065 self.euclidean_distance_optimized(&point_i, &point_j)?
1066 };
1067 result[[i, j]] = distance;
1068 }
1069 }
1070 }
1071 Ok(())
1072 }
1073
1074 pub fn knn_search_vectorized(
1076 &self,
1077 query_points: &ArrayView2<'_, f64>,
1078 data_points: &ArrayView2<'_, f64>,
1079 k: usize,
1080 ) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
1081 use scirs2_core::parallel_ops::{ParallelBridge, ParallelIterator};
1082 let n_queries = query_points.nrows();
1083 let n_data = data_points.nrows();
1084
1085 if k > n_data {
1086 return Err(SpatialError::ValueError(format!(
1087 "k ({k}) cannot be larger than number of data _points ({n_data})"
1088 )));
1089 }
1090
1091 let mut indices = Array2::zeros((n_queries, k));
1092 let mut distances = Array2::zeros((n_queries, k));
1093
1094 indices
1096 .outer_iter_mut()
1097 .zip(distances.outer_iter_mut())
1098 .enumerate()
1099 .par_bridge()
1100 .try_for_each(
1101 |(query_idx, (mut idx_row, mut dist_row))| -> SpatialResult<()> {
1102 let query = query_points.row(query_idx);
1103
1104 let all_distances = self.compute_distances_to_all(&query, data_points)?;
1106
1107 let mut indexed_distances: Vec<(f64, usize)> = all_distances
1109 .iter()
1110 .enumerate()
1111 .map(|(idx, &dist)| (dist, idx))
1112 .collect();
1113
1114 indexed_distances
1115 .select_nth_unstable_by(k - 1, |a, b| a.0.partial_cmp(&b.0).unwrap());
1116 indexed_distances[..k]
1117 .sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
1118
1119 for (i, (dist, idx)) in indexed_distances[..k].iter().enumerate() {
1121 dist_row[i] = *dist;
1122 idx_row[i] = *idx;
1123 }
1124
1125 Ok(())
1126 },
1127 )?;
1128
1129 Ok((indices, distances))
1130 }
1131
1132 fn compute_distances_to_all(
1134 &self,
1135 query: &ArrayView1<f64>,
1136 data_points: &ArrayView2<'_, f64>,
1137 ) -> SpatialResult<Array1<f64>> {
1138 let n_data = data_points.nrows();
1139 let mut distances = Array1::zeros(n_data);
1140
1141 const BATCH_SIZE: usize = 32;
1143
1144 for batch_start in (0..n_data).step_by(BATCH_SIZE) {
1145 let batch_end = (batch_start + BATCH_SIZE).min(n_data);
1146
1147 for i in batch_start..batch_end {
1148 let data_point = data_points.row(i);
1149 distances[i] = self.euclidean_distance_optimized(query, &data_point)?;
1150 }
1151 }
1152
1153 Ok(distances)
1154 }
1155
1156 pub fn optimal_simd_block_size(&self) -> usize {
1158 if self.capabilities.avx512_available {
1159 8 } else if self.capabilities.avx2_available {
1161 4 } else {
1163 2 }
1165 }
1166
1167 pub fn report_capabilities(&self) {
1169 println!("Hardware-Specific SIMD Capabilities:");
1170 println!(" AVX-512: {}", self.capabilities.avx512_available);
1171 println!(" AVX2: {}", self.capabilities.avx2_available);
1172 println!(" NEON: {}", self.capabilities.neon_available);
1173 println!(" FMA: {}", self.capabilities.simd_available);
1174 println!(" Optimal block size: {}", self.optimal_simd_block_size());
1175 }
1176 }
1177}
1178
1179pub mod mixed_precision_simd {
1181 use crate::error::{SpatialError, SpatialResult};
1182 use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
1183 use scirs2_core::parallel_ops::*;
1184 use scirs2_core::simd_ops::SimdUnifiedOps;
1185
1186 pub fn simd_euclidean_distance_f32(a: &[f32], b: &[f32]) -> SpatialResult<f32> {
1188 if a.len() != b.len() {
1189 return Err(SpatialError::ValueError(
1190 "Points must have the same dimension".to_string(),
1191 ));
1192 }
1193
1194 let a_view = ArrayView1::from(a);
1195 let b_view = ArrayView1::from(b);
1196
1197 let diff = f32::simd_sub(&a_view, &b_view);
1199 let squared = f32::simd_mul(&diff.view(), &diff.view());
1200 let sum = f32::simd_sum(&squared.view());
1201 Ok(sum.sqrt())
1202 }
1203
1204 pub fn simd_euclidean_distance_batch_f32(
1206 points1: &ArrayView2<f32>,
1207 points2: &ArrayView2<f32>,
1208 ) -> SpatialResult<Array1<f32>> {
1209 if points1.shape() != points2.shape() {
1210 return Err(SpatialError::ValueError(
1211 "Point arrays must have the same shape".to_string(),
1212 ));
1213 }
1214
1215 let n_points = points1.nrows();
1216
1217 let distances_vec: Result<Vec<f32>, SpatialError> = (0..n_points)
1219 .into_par_iter()
1220 .map(|i| -> SpatialResult<f32> {
1221 let p1 = points1.row(i);
1222 let p2 = points2.row(i);
1223 let diff = f32::simd_sub(&p1, &p2);
1224 let squared = f32::simd_mul(&diff.view(), &diff.view());
1225 let sum = f32::simd_sum(&squared.view());
1226 Ok(sum.sqrt())
1227 })
1228 .collect();
1229
1230 Ok(Array1::from(distances_vec?))
1231 }
1232
1233 pub fn adaptive_precision_distance_matrix(
1235 points: &ArrayView2<'_, f64>,
1236 precision_threshold: f64,
1237 ) -> SpatialResult<Array2<f64>> {
1238 let n_points = points.nrows();
1239 let mut result = Array2::zeros((n_points, n_points));
1240
1241 let can_use_f32 = points.iter().all(|&x| x.abs() < precision_threshold);
1243
1244 if can_use_f32 {
1245 let points_f32 = points.mapv(|x| x as f32);
1247
1248 for i in 0..n_points {
1250 for j in i..n_points {
1251 if i == j {
1252 result[[i, j]] = 0.0;
1253 } else {
1254 let p1 = points_f32.row(i).to_vec();
1255 let p2 = points_f32.row(j).to_vec();
1256 let dist = simd_euclidean_distance_f32(&p1, &p2)? as f64;
1257 result[[i, j]] = dist;
1258 result[[j, i]] = dist;
1259 }
1260 }
1261 }
1262 } else {
1263 let optimizer = super::hardware_specific_simd::HardwareOptimizedDistances::new();
1265 result = optimizer.batch_distance_matrix_optimized(points)?;
1266 }
1267
1268 Ok(result)
1269 }
1270}
1271
1272pub mod bench {
1274 use super::mixed_precision_simd::simd_euclidean_distance_batch_f32;
1275 use crate::simd_euclidean_distance_batch;
1276 use ndarray::ArrayView2;
1277 use scirs2_core::simd_ops::PlatformCapabilities;
1278 use std::time::Instant;
1279
1280 pub fn benchmark_distance_computation(
1282 points1: &ArrayView2<'_, f64>,
1283 points2: &ArrayView2<'_, f64>,
1284 iterations: usize,
1285 ) -> BenchmarkResults {
1286 let mut results = BenchmarkResults::default();
1287
1288 let start = Instant::now();
1290 for _ in 0..iterations {
1291 for (row1, row2) in points1.outer_iter().zip(points2.outer_iter()) {
1292 let _dist =
1293 crate::distance::euclidean(row1.as_slice().unwrap(), row2.as_slice().unwrap());
1294 }
1295 }
1296 results.scalar_time = start.elapsed().as_secs_f64();
1297
1298 let start = Instant::now();
1300 for _ in 0..iterations {
1301 let _distances = simd_euclidean_distance_batch(points1, points2).unwrap();
1302 }
1303 results.simd_f64_time = start.elapsed().as_secs_f64();
1304
1305 if points1.ncols() <= 16 {
1307 let points1_f32 = points1.mapv(|x| x as f32);
1309 let points2_f32 = points2.mapv(|x| x as f32);
1310
1311 let start = Instant::now();
1312 for _ in 0..iterations {
1313 let _distances =
1314 simd_euclidean_distance_batch_f32(&points1_f32.view(), &points2_f32.view())
1315 .unwrap();
1316 }
1317 results.simd_f32_time = Some(start.elapsed().as_secs_f64());
1318 }
1319
1320 results.compute_speedups();
1321 results
1322 }
1323
1324 #[derive(Debug, Default)]
1326 pub struct BenchmarkResults {
1327 pub scalar_time: f64,
1328 pub simd_f64_time: f64,
1329 pub simd_f32_time: Option<f64>,
1330 pub simd_f64_speedup: f64,
1331 pub simd_f32_speedup: Option<f64>,
1332 }
1333
1334 impl BenchmarkResults {
1335 fn compute_speedups(&mut self) {
1336 if self.simd_f64_time > 0.0 {
1337 self.simd_f64_speedup = self.scalar_time / self.simd_f64_time;
1338 }
1339
1340 if let Some(f32_time) = self.simd_f32_time {
1341 if f32_time > 0.0 {
1342 self.simd_f32_speedup = Some(self.scalar_time / f32_time);
1343 }
1344 }
1345 }
1346
1347 pub fn report(&self) {
1349 println!("Advanced-SIMD Performance Benchmark Results:");
1350 println!(" Scalar time: {:.6} seconds", self.scalar_time);
1351 println!(
1352 " SIMD f64 time: {:.6} seconds ({:.2}x speedup)",
1353 self.simd_f64_time, self.simd_f64_speedup
1354 );
1355
1356 if let (Some(f32_time), Some(f32_speedup)) = (self.simd_f32_time, self.simd_f32_speedup)
1357 {
1358 println!(" SIMD f32 time: {f32_time:.6} seconds ({f32_speedup:.2}x speedup)");
1359 }
1360 }
1361 }
1362
1363 pub fn report_simd_features() {
1365 println!("Advanced-SIMD Features Available:");
1366
1367 let caps = PlatformCapabilities::detect();
1368 println!(" SIMD Available: {}", caps.simd_available);
1369 println!(" GPU Available: {}", caps.gpu_available);
1370
1371 if caps.simd_available {
1372 println!(" AVX2: {}", caps.avx2_available);
1373 println!(" AVX512: {}", caps.avx512_available);
1374 println!(" NEON: {}", caps.neon_available);
1375 println!(" FMA Support: {}", caps.simd_available);
1376 }
1377
1378 let theoretical_speedup = if caps.avx512_available {
1380 8.0
1381 } else if caps.avx2_available || caps.neon_available {
1382 4.0
1383 } else {
1384 2.0
1385 };
1386
1387 println!(" Theoretical Max Speedup: {theoretical_speedup:.1}x");
1388 }
1389}
1390
1391#[cfg(test)]
1392mod tests {
1393 use super::hardware_specific_simd::HardwareOptimizedDistances;
1394 use super::{
1395 linear_to_condensed_indices, parallel_cdist, parallel_pdist, simd_chebyshev_distance,
1396 simd_euclidean_distance, simd_euclidean_distance_batch, simd_knn_search,
1397 simd_manhattan_distance,
1398 };
1399 use approx::assert_relative_eq;
1400 use ndarray::array;
1401
1402 #[test]
1403 #[ignore]
1404 fn test_simd_euclidean_distance() {
1405 let a = vec![1.0, 2.0, 3.0];
1406 let b = vec![4.0, 5.0, 6.0];
1407
1408 let simd_dist = simd_euclidean_distance(&a, &b).unwrap();
1409 let scalar_dist = crate::distance::euclidean(&a, &b);
1410
1411 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1412 }
1413
1414 #[test]
1415 #[ignore]
1416 fn test_simd_manhattan_distance() {
1417 let a = vec![1.0, 2.0, 3.0];
1418 let b = vec![4.0, 5.0, 6.0];
1419
1420 let simd_dist = simd_manhattan_distance(&a, &b).unwrap();
1421 let scalar_dist = crate::distance::manhattan(&a, &b);
1422
1423 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1424 }
1425
1426 #[test]
1427 #[ignore]
1428 fn test_simd_batch_distance() {
1429 let points1 = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
1430 let points2 = array![[2.0, 3.0], [4.0, 5.0], [6.0, 7.0]];
1431
1432 let distances = simd_euclidean_distance_batch(&points1.view(), &points2.view()).unwrap();
1433
1434 assert_eq!(distances.len(), 3);
1435 for &dist in distances.iter() {
1436 assert!(dist > 0.0);
1437 assert!(dist.is_finite());
1438 }
1439
1440 for i in 0..3 {
1442 let p1 = points1.row(i).to_vec();
1443 let p2 = points2.row(i).to_vec();
1444 let expected = crate::distance::euclidean(&p1, &p2);
1445 assert_relative_eq!(distances[i], expected, epsilon = 1e-10);
1446 }
1447 }
1448
1449 #[test]
1450 #[ignore]
1451 fn test_parallel_pdist() {
1452 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1453
1454 let distances = parallel_pdist(&points.view(), "euclidean").unwrap();
1455
1456 assert_eq!(distances.len(), 6);
1458
1459 for &dist in distances.iter() {
1461 assert!(dist > 0.0);
1462 assert!(dist.is_finite());
1463 }
1464 }
1465
1466 #[test]
1467 #[ignore]
1468 fn test_parallel_cdist() {
1469 let points1 = array![[0.0, 0.0], [1.0, 1.0]];
1470 let points2 = array![[1.0, 0.0], [0.0, 1.0], [2.0, 2.0]];
1471
1472 let distances = parallel_cdist(&points1.view(), &points2.view(), "euclidean").unwrap();
1473
1474 assert_eq!(distances.shape(), &[2, 3]);
1475
1476 for &dist in distances.iter() {
1478 assert!(dist >= 0.0);
1479 assert!(dist.is_finite());
1480 }
1481 }
1482
1483 #[test]
1484 #[ignore]
1485 fn test_simd_knn_search() {
1486 let data_points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0], [2.0, 2.0]];
1487 let query_points = array![[0.5, 0.5], [1.5, 1.5]];
1488
1489 let (indices, distances) =
1490 simd_knn_search(&query_points.view(), &data_points.view(), 3, "euclidean").unwrap();
1491
1492 assert_eq!(indices.shape(), &[2, 3]);
1493 assert_eq!(distances.shape(), &[2, 3]);
1494
1495 for row in distances.outer_iter() {
1497 for i in 1..row.len() {
1498 assert!(row[i] >= row[i - 1]);
1499 }
1500 }
1501
1502 for &idx in indices.iter() {
1504 assert!(idx < data_points.nrows());
1505 }
1506 }
1507
1508 #[test]
1509 fn test_linear_to_condensed_indices() {
1510 let n = 4;
1512 let expected_pairs = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)];
1513
1514 for (linear_idx, expected) in expected_pairs.iter().enumerate() {
1515 let result = linear_to_condensed_indices(linear_idx, n);
1516 assert_eq!(result, *expected);
1517 }
1518 }
1519
1520 #[test]
1521 #[ignore]
1522 fn test_simd_chebyshev_distance() {
1523 let a = vec![1.0, 2.0, 3.0];
1524 let b = vec![4.0, 5.0, 1.0];
1525
1526 let dist = simd_chebyshev_distance(&a, &b).unwrap();
1527
1528 assert_relative_eq!(dist, 3.0, epsilon = 1e-10);
1530 }
1531
1532 #[test]
1533 #[ignore]
1534 fn test_different_metrics() {
1535 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
1536
1537 let metrics = ["euclidean", "manhattan", "sqeuclidean", "chebyshev"];
1538
1539 for metric in &metrics {
1540 let distances = parallel_pdist(&points.view(), metric).unwrap();
1541 assert_eq!(distances.len(), 3); for &dist in distances.iter() {
1544 assert!(dist >= 0.0);
1545 assert!(dist.is_finite());
1546 }
1547 }
1548 }
1549
1550 #[test]
1551 fn test_error_handling() {
1552 let a = vec![1.0, 2.0];
1553 let b = vec![1.0, 2.0, 3.0]; let result = simd_euclidean_distance(&a, &b);
1556 assert!(result.is_err());
1557
1558 let result = simd_manhattan_distance(&a, &b);
1559 assert!(result.is_err());
1560 }
1561
1562 #[test]
1563 #[ignore]
1564 fn test_large_dimension_vectors() {
1565 let a = vec![0.0, 1.0, 2.0];
1567 let b = vec![1.0, 2.0, 3.0];
1568
1569 let simd_dist = simd_euclidean_distance(&a, &b).unwrap();
1570 let scalar_dist = crate::distance::euclidean(&a, &b);
1571
1572 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1574
1575 let dim = 1000; let a: Vec<f64> = (0..dim).map(|i| i as f64).collect();
1578 let b: Vec<f64> = (0..dim).map(|i| (i + 1) as f64).collect();
1579
1580 let simd_dist = simd_euclidean_distance(&a, &b).unwrap();
1581 let scalar_dist = crate::distance::euclidean(&a, &b);
1582
1583 assert_relative_eq!(simd_dist, scalar_dist, epsilon = 1e-10);
1585 }
1586
1587 #[test]
1588 #[ignore]
1589 fn test_hardware_optimized_distances() {
1590 use super::hardware_specific_simd::HardwareOptimizedDistances;
1591
1592 let optimizer = HardwareOptimizedDistances::new();
1593
1594 let a = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1595 let b = array![2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
1596
1597 let optimized_dist = optimizer
1598 .euclidean_distance_optimized(&a.view(), &b.view())
1599 .unwrap();
1600 let scalar_dist = crate::distance::euclidean(a.as_slice().unwrap(), b.as_slice().unwrap());
1601
1602 assert_relative_eq!(optimized_dist, scalar_dist, epsilon = 1e-10);
1603 }
1604
1605 #[test]
1606 #[ignore]
1607 fn test_hardware_optimized_batch_matrix() {
1608 let points = array![
1609 [0.0, 0.0],
1610 [1.0, 0.0],
1611 [0.0, 1.0],
1612 [1.0, 1.0],
1613 [2.0, 2.0],
1614 [3.0, 3.0],
1615 [4.0, 4.0],
1616 [5.0, 5.0]
1617 ];
1618
1619 let optimizer = HardwareOptimizedDistances::new();
1620 let result = optimizer.batch_distance_matrix_optimized(&points.view());
1621
1622 assert!(result.is_ok());
1623 let matrix = result.unwrap();
1624 assert_eq!(matrix.dim(), (8, 8));
1625
1626 for i in 0..8 {
1628 assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-10);
1629 }
1630
1631 for i in 0..8 {
1633 for j in 0..8 {
1634 assert_relative_eq!(matrix[[i, j]], matrix[[j, i]], epsilon = 1e-10);
1635 }
1636 }
1637 }
1638
1639 #[test]
1640 #[ignore]
1641 fn test_hardware_optimized_knn() {
1642 let data_points = array![
1643 [0.0, 0.0],
1644 [1.0, 0.0],
1645 [0.0, 1.0],
1646 [1.0, 1.0],
1647 [2.0, 2.0],
1648 [3.0, 3.0],
1649 [4.0, 4.0],
1650 [5.0, 5.0]
1651 ];
1652 let query_points = array![[0.5, 0.5], [2.5, 2.5]];
1653
1654 let optimizer = HardwareOptimizedDistances::new();
1655 let result = optimizer.knn_search_vectorized(&query_points.view(), &data_points.view(), 3);
1656
1657 assert!(result.is_ok());
1658 let (indices, distances) = result.unwrap();
1659
1660 assert_eq!(indices.dim(), (2, 3));
1661 assert_eq!(distances.dim(), (2, 3));
1662
1663 for row in distances.outer_iter() {
1665 for i in 1..row.len() {
1666 assert!(row[i] >= row[i - 1]);
1667 }
1668 }
1669 }
1670
1671 #[test]
1672 #[ignore]
1673 fn test_mixed_precision_adaptive() {
1674 use super::mixed_precision_simd::adaptive_precision_distance_matrix;
1675
1676 let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
1678
1679 let result = adaptive_precision_distance_matrix(&points.view(), 1e6);
1680 assert!(result.is_ok());
1681
1682 let matrix = result.unwrap();
1683 assert_eq!(matrix.dim(), (4, 4));
1684
1685 for i in 0..4 {
1687 assert_relative_eq!(matrix[[i, i]], 0.0, epsilon = 1e-6);
1688 }
1689 }
1690
1691 #[test]
1692 fn test_simd_capabilities_reporting() {
1693 let optimizer = HardwareOptimizedDistances::new();
1694
1695 optimizer.report_capabilities();
1697
1698 let block_size = optimizer.optimal_simd_block_size();
1699 assert!((2..=8).contains(&block_size));
1700 }
1701}