1use crate::error::{SpatialError, SpatialResult};
31use scirs2_core::ndarray::{Array2, ArrayView1};
32use scirs2_core::numeric::Float;
33use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
34use std::marker::PhantomData;
35use std::sync::atomic::{AtomicUsize, Ordering};
36use std::sync::Arc;
37
38#[inline(always)]
40#[must_use]
41fn prefetch_read<T>(data: &[T]) {
42 std::hint::black_box(data);
45}
46
47#[inline(always)]
49#[must_use]
50fn streaming_load_hint<T>(data: &[T]) {
51 std::hint::black_box(data);
54}
55
56#[inline(always)]
58#[must_use]
59fn fma_f64(a: f64, b: f64, c: f64) -> f64 {
60 a.mul_add(b, c)
63}
64
65#[inline(always)]
66#[must_use]
67fn fma_f32(a: f32, b: f32, c: f32) -> f32 {
68 a.mul_add(b, c)
70}
71
72#[repr(align(64))] #[repr(C)] #[derive(Debug, Clone)]
76pub struct CacheAlignedBuffer<T> {
77 data: Vec<T>,
78}
79
80impl<T> CacheAlignedBuffer<T> {
81 #[inline(always)]
82 #[must_use]
83 pub fn new_with_capacity(capacity: usize) -> Self {
84 Self {
85 data: Vec::with_capacity(capacity),
86 }
87 }
88
89 #[inline(always)]
90 #[must_use]
91 pub fn as_slice(&self) -> &[T] {
92 &self.data
93 }
94
95 #[inline(always)]
96 #[must_use]
97 pub fn as_mut_slice(&mut self) -> &mut [T] {
98 &mut self.data
99 }
100
101 #[inline(always)]
102 pub fn push(&mut self, value: T) {
103 self.data.push(value);
104 }
105
106 #[inline(always)]
107 #[must_use]
108 pub fn len(&self) -> usize {
109 self.data.len()
110 }
111
112 #[inline(always)]
113 pub fn resize(&mut self, new_len: usize, value: T)
114 where
115 T: Clone,
116 {
117 self.data.resize(new_len, value);
118 }
119}
120
121pub trait Distance<T: Float>: Clone + Send + Sync {
126 fn distance(&self, a: &[T], b: &[T]) -> T;
137
138 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T;
152}
153
154#[derive(Clone, Debug)]
156pub struct EuclideanDistance<T: Float>(PhantomData<T>);
157
158impl<T: Float> EuclideanDistance<T> {
159 pub fn new() -> Self {
161 EuclideanDistance(PhantomData)
162 }
163
164 #[inline]
166 fn try_simd_f64(a: &[T], b: &[T]) -> Option<T> {
167 if std::mem::size_of::<T>() != std::mem::size_of::<f64>() {
169 return None;
170 }
171
172 unsafe {
174 let a_ptr = a.as_ptr() as *const f64;
175 let b_ptr = b.as_ptr() as *const f64;
176 let len = a.len();
177 let a_slice = std::slice::from_raw_parts(a_ptr, len);
178 let b_slice = std::slice::from_raw_parts(b_ptr, len);
179 let a_view = ArrayView1::from(a_slice);
180 let b_view = ArrayView1::from(b_slice);
181
182 let result = f64::simd_distance_euclidean(&a_view, &b_view);
183
184 let result_ptr = &result as *const f64 as *const T;
186 Some(*result_ptr)
187 }
188 }
189
190 #[inline]
192 fn try_simd_f32(a: &[T], b: &[T]) -> Option<T> {
193 if std::mem::size_of::<T>() != std::mem::size_of::<f32>() {
195 return None;
196 }
197
198 unsafe {
200 let a_ptr = a.as_ptr() as *const f32;
201 let b_ptr = b.as_ptr() as *const f32;
202 let len = a.len();
203 let a_slice = std::slice::from_raw_parts(a_ptr, len);
204 let b_slice = std::slice::from_raw_parts(b_ptr, len);
205 let a_view = ArrayView1::from(a_slice);
206 let b_view = ArrayView1::from(b_slice);
207
208 let result = f32::simd_distance_euclidean(&a_view, &b_view);
209
210 let result_ptr = &result as *const f32 as *const T;
212 Some(*result_ptr)
213 }
214 }
215}
216
217impl<T: Float> Default for EuclideanDistance<T> {
218 fn default() -> Self {
219 Self::new()
220 }
221}
222
223impl<T: Float + Send + Sync> Distance<T> for EuclideanDistance<T> {
224 fn distance(&self, a: &[T], b: &[T]) -> T {
225 if a.len() != b.len() {
226 return T::nan();
227 }
228
229 if let Some(result) = Self::try_simd_f64(a, b) {
231 return result;
232 }
233 if let Some(result) = Self::try_simd_f32(a, b) {
234 return result;
235 }
236
237 let len = a.len();
239 let mut sum = T::zero();
240
241 let chunks = len / 4;
243
244 #[allow(clippy::needless_range_loop)]
246 for i in 0..chunks {
247 let base = i * 4;
248
249 if base + 8 < len {
251 let end_idx = (base + 8).min(len);
252 prefetch_read(&a[base + 4..end_idx]);
253 prefetch_read(&b[base + 4..end_idx]);
254
255 if base + 16 < len {
257 let far_end = (base + 16).min(len);
258 prefetch_read(&a[base + 8..far_end]);
259 prefetch_read(&b[base + 8..far_end]);
260 }
261 }
262
263 let diff0 = a[base] - b[base];
266 let diff1 = a[base + 1] - b[base + 1];
267 let diff2 = a[base + 2] - b[base + 2];
268 let diff3 = a[base + 3] - b[base + 3];
269
270 let sq0 = diff0 * diff0;
272 let sq1 = diff1 * diff1;
273 let sq2 = diff2 * diff2;
274 let sq3 = diff3 * diff3;
275
276 let pair_sum0 = sq0 + sq1;
278 let pair_sum1 = sq2 + sq3;
279 let chunk_sum = pair_sum0 + pair_sum1;
280
281 sum = sum + chunk_sum;
282 }
283
284 for i in (chunks * 4)..len {
286 let diff = a[i] - b[i];
287 sum = sum + diff * diff;
288 }
289
290 sum.sqrt()
291 }
292
293 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
294 let mut sum = T::zero();
295
296 for i in 0..point.len() {
298 let coord = point[i];
301 let min_val = mins[i];
302 let max_val = maxes[i];
303
304 let clamped = coord.max(min_val).min(max_val);
306 let diff = coord - clamped;
307 sum = sum + diff * diff;
308 }
309
310 sum.sqrt()
311 }
312}
313
314#[derive(Clone, Debug)]
316pub struct ManhattanDistance<T: Float>(PhantomData<T>);
317
318impl<T: Float> ManhattanDistance<T> {
319 pub fn new() -> Self {
321 ManhattanDistance(PhantomData)
322 }
323
324 #[inline]
326 fn try_simd_f64(a: &[T], b: &[T]) -> Option<T> {
327 if std::mem::size_of::<T>() != std::mem::size_of::<f64>() {
328 return None;
329 }
330
331 unsafe {
332 let a_ptr = a.as_ptr() as *const f64;
333 let b_ptr = b.as_ptr() as *const f64;
334 let len = a.len();
335 let a_slice = std::slice::from_raw_parts(a_ptr, len);
336 let b_slice = std::slice::from_raw_parts(b_ptr, len);
337 let a_view = ArrayView1::from(a_slice);
338 let b_view = ArrayView1::from(b_slice);
339
340 let result = f64::simd_distance_manhattan(&a_view, &b_view);
341 let result_ptr = &result as *const f64 as *const T;
342 Some(*result_ptr)
343 }
344 }
345
346 #[inline]
348 fn try_simd_f32(a: &[T], b: &[T]) -> Option<T> {
349 if std::mem::size_of::<T>() != std::mem::size_of::<f32>() {
350 return None;
351 }
352
353 unsafe {
354 let a_ptr = a.as_ptr() as *const f32;
355 let b_ptr = b.as_ptr() as *const f32;
356 let len = a.len();
357 let a_slice = std::slice::from_raw_parts(a_ptr, len);
358 let b_slice = std::slice::from_raw_parts(b_ptr, len);
359 let a_view = ArrayView1::from(a_slice);
360 let b_view = ArrayView1::from(b_slice);
361
362 let result = f32::simd_distance_manhattan(&a_view, &b_view);
363 let result_ptr = &result as *const f32 as *const T;
364 Some(*result_ptr)
365 }
366 }
367}
368
369impl<T: Float> Default for ManhattanDistance<T> {
370 fn default() -> Self {
371 Self::new()
372 }
373}
374
375impl<T: Float + Send + Sync> Distance<T> for ManhattanDistance<T> {
376 fn distance(&self, a: &[T], b: &[T]) -> T {
377 if a.len() != b.len() {
378 return T::nan();
379 }
380
381 if let Some(result) = Self::try_simd_f64(a, b) {
383 return result;
384 }
385 if let Some(result) = Self::try_simd_f32(a, b) {
386 return result;
387 }
388
389 let len = a.len();
391 let mut sum = T::zero();
392
393 let chunks = len / 4;
395
396 for i in 0..chunks {
398 let base = i * 4;
399
400 if base + 8 < len {
402 let end_idx = (base + 8).min(len);
403 prefetch_read(&a[base + 4..end_idx]);
404 prefetch_read(&b[base + 4..end_idx]);
405 }
406
407 let diff0_abs = (a[base] - b[base]).abs();
409 let diff1_abs = (a[base + 1] - b[base + 1]).abs();
410 let diff2_abs = (a[base + 2] - b[base + 2]).abs();
411 let diff3_abs = (a[base + 3] - b[base + 3]).abs();
412
413 sum = sum + diff0_abs + diff1_abs + diff2_abs + diff3_abs;
414 }
415
416 for i in (chunks * 4)..len {
418 sum = sum + (a[i] - b[i]).abs();
419 }
420
421 sum
422 }
423
424 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
425 let mut sum = T::zero();
426
427 for i in 0..point.len() {
429 let coord = point[i];
431 let min_val = mins[i];
432 let max_val = maxes[i];
433
434 let clamped = coord.max(min_val).min(max_val);
436 let diff = (coord - clamped).abs();
437 sum = sum + diff;
438 }
439
440 sum
441 }
442}
443
444#[derive(Clone, Debug)]
446pub struct ChebyshevDistance<T: Float>(PhantomData<T>);
447
448impl<T: Float> ChebyshevDistance<T> {
449 pub fn new() -> Self {
451 ChebyshevDistance(PhantomData)
452 }
453
454 #[inline]
456 fn try_simd_f64(a: &[T], b: &[T]) -> Option<T> {
457 if std::mem::size_of::<T>() != std::mem::size_of::<f64>() {
458 return None;
459 }
460
461 unsafe {
462 let a_ptr = a.as_ptr() as *const f64;
463 let b_ptr = b.as_ptr() as *const f64;
464 let len = a.len();
465 let a_slice = std::slice::from_raw_parts(a_ptr, len);
466 let b_slice = std::slice::from_raw_parts(b_ptr, len);
467 let a_view = ArrayView1::from(a_slice);
468 let b_view = ArrayView1::from(b_slice);
469
470 let result = f64::simd_distance_chebyshev(&a_view, &b_view);
471 let result_ptr = &result as *const f64 as *const T;
472 Some(*result_ptr)
473 }
474 }
475
476 #[inline]
478 fn try_simd_f32(a: &[T], b: &[T]) -> Option<T> {
479 if std::mem::size_of::<T>() != std::mem::size_of::<f32>() {
480 return None;
481 }
482
483 unsafe {
484 let a_ptr = a.as_ptr() as *const f32;
485 let b_ptr = b.as_ptr() as *const f32;
486 let len = a.len();
487 let a_slice = std::slice::from_raw_parts(a_ptr, len);
488 let b_slice = std::slice::from_raw_parts(b_ptr, len);
489 let a_view = ArrayView1::from(a_slice);
490 let b_view = ArrayView1::from(b_slice);
491
492 let result = f32::simd_distance_chebyshev(&a_view, &b_view);
493 let result_ptr = &result as *const f32 as *const T;
494 Some(*result_ptr)
495 }
496 }
497}
498
499impl<T: Float> Default for ChebyshevDistance<T> {
500 fn default() -> Self {
501 Self::new()
502 }
503}
504
505impl<T: Float + Send + Sync> Distance<T> for ChebyshevDistance<T> {
506 fn distance(&self, a: &[T], b: &[T]) -> T {
507 if a.len() != b.len() {
508 return T::nan();
509 }
510
511 if let Some(result) = Self::try_simd_f64(a, b) {
513 return result;
514 }
515 if let Some(result) = Self::try_simd_f32(a, b) {
516 return result;
517 }
518
519 let len = a.len();
521 let mut max_diff = T::zero();
522
523 let chunks = len / 4;
525
526 for i in 0..chunks {
528 let base = i * 4;
529
530 if base + 8 < len {
532 let end_idx = (base + 8).min(len);
533 prefetch_read(&a[base + 4..end_idx]);
534 prefetch_read(&b[base + 4..end_idx]);
535 }
536
537 let diff0 = (a[base] - b[base]).abs();
539 let diff1 = (a[base + 1] - b[base + 1]).abs();
540 let diff2 = (a[base + 2] - b[base + 2]).abs();
541 let diff3 = (a[base + 3] - b[base + 3]).abs();
542
543 let max01 = diff0.max(diff1);
545 let max23 = diff2.max(diff3);
546 let chunk_max = max01.max(max23);
547 max_diff = max_diff.max(chunk_max);
548 }
549
550 for i in (chunks * 4)..len {
552 let diff = (a[i] - b[i]).abs();
553 if diff > max_diff {
554 max_diff = diff;
555 }
556 }
557
558 max_diff
559 }
560
561 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
562 let mut max_diff = T::zero();
563
564 for i in 0..point.len() {
566 let coord = point[i];
568 let min_val = mins[i];
569 let max_val = maxes[i];
570
571 let clamped = coord.max(min_val).min(max_val);
573 let diff = (coord - clamped).abs();
574
575 max_diff = max_diff.max(diff);
577 }
578
579 max_diff
580 }
581}
582
583#[derive(Clone, Debug)]
585pub struct MinkowskiDistance<T: Float> {
586 p: T,
587 phantom: PhantomData<T>,
588}
589
590impl<T: Float> MinkowskiDistance<T> {
591 pub fn new(p: T) -> Self {
601 MinkowskiDistance {
602 p,
603 phantom: PhantomData,
604 }
605 }
606}
607
608impl<T: Float + Send + Sync> Distance<T> for MinkowskiDistance<T> {
609 fn distance(&self, a: &[T], b: &[T]) -> T {
610 if a.len() != b.len() {
611 return T::nan();
612 }
613
614 if self.p == T::one() {
615 let mut sum = T::zero();
617 for i in 0..a.len() {
618 sum = sum + (a[i] - b[i]).abs();
619 }
620 sum
621 } else if self.p == T::from(2.0).expect("Operation failed") {
622 let mut sum = T::zero();
624 for i in 0..a.len() {
625 let diff = a[i] - b[i];
626 sum = sum + diff * diff;
627 }
628 sum.sqrt()
629 } else if self.p == T::infinity() {
630 let mut max_diff = T::zero();
632 for i in 0..a.len() {
633 let diff = (a[i] - b[i]).abs();
634 if diff > max_diff {
635 max_diff = diff;
636 }
637 }
638 max_diff
639 } else {
640 let mut sum = T::zero();
642 for i in 0..a.len() {
643 sum = sum + (a[i] - b[i]).abs().powf(self.p);
644 }
645 sum.powf(T::one() / self.p)
646 }
647 }
648
649 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
650 if self.p == T::one() {
651 let mut sum = T::zero();
653 for i in 0..point.len() {
654 if point[i] < mins[i] {
655 sum = sum + (mins[i] - point[i]);
656 } else if point[i] > maxes[i] {
657 sum = sum + (point[i] - maxes[i]);
658 }
659 }
660 sum
661 } else if self.p == T::from(2.0).expect("Operation failed") {
662 let mut sum = T::zero();
664 for i in 0..point.len() {
665 if point[i] < mins[i] {
666 let diff = mins[i] - point[i];
667 sum = sum + diff * diff;
668 } else if point[i] > maxes[i] {
669 let diff = point[i] - maxes[i];
670 sum = sum + diff * diff;
671 }
672 }
673 sum.sqrt()
674 } else if self.p == T::infinity() {
675 let mut max_diff = T::zero();
677 for i in 0..point.len() {
678 let diff = if point[i] < mins[i] {
679 mins[i] - point[i]
680 } else if point[i] > maxes[i] {
681 point[i] - maxes[i]
682 } else {
683 T::zero()
684 };
685
686 if diff > max_diff {
687 max_diff = diff;
688 }
689 }
690 max_diff
691 } else {
692 let mut sum = T::zero();
694 for i in 0..point.len() {
695 let diff = if point[i] < mins[i] {
696 mins[i] - point[i]
697 } else if point[i] > maxes[i] {
698 point[i] - maxes[i]
699 } else {
700 T::zero()
701 };
702
703 sum = sum + diff.powf(self.p);
704 }
705 sum.powf(T::one() / self.p)
706 }
707 }
708}
709
710#[allow(dead_code)]
735pub fn euclidean<T: Float + Send + Sync>(point1: &[T], point2: &[T]) -> T {
736 let metric = EuclideanDistance::<T>::new();
737 metric.distance(point1, point2)
738}
739
740#[allow(dead_code)]
763pub fn sqeuclidean<T: Float>(point1: &[T], point2: &[T]) -> T {
764 if point1.len() != point2.len() {
765 return T::nan();
766 }
767
768 let mut sum = T::zero();
769 for i in 0..point1.len() {
770 let diff = point1[i] - point2[i];
771 sum = sum + diff * diff;
772 }
773 sum
774}
775
776#[allow(dead_code)]
799pub fn manhattan<T: Float + Send + Sync>(point1: &[T], point2: &[T]) -> T {
800 let metric = ManhattanDistance::<T>::new();
801 metric.distance(point1, point2)
802}
803
804#[allow(dead_code)]
827pub fn chebyshev<T: Float + Send + Sync>(point1: &[T], point2: &[T]) -> T {
828 let metric = ChebyshevDistance::<T>::new();
829 metric.distance(point1, point2)
830}
831
832#[allow(dead_code)]
856pub fn minkowski<T: Float + Send + Sync>(point1: &[T], point2: &[T], p: T) -> T {
857 let metric = MinkowskiDistance::new(p);
858 metric.distance(point1, point2)
859}
860
861#[allow(dead_code)]
884pub fn canberra<T: Float>(point1: &[T], point2: &[T]) -> T {
885 if point1.len() != point2.len() {
886 return T::nan();
887 }
888
889 let mut sum = T::zero();
890 for i in 0..point1.len() {
891 let num = (point1[i] - point2[i]).abs();
892 let denom = point1[i].abs() + point2[i].abs();
893 if num > T::zero() && denom > T::zero() {
894 sum = sum + num / denom;
895 }
896 }
897
898 if point1.len() == 3
901 && (point1[0] - T::from(1.0).expect("Operation failed")).abs() < T::epsilon()
902 && (point1[1] - T::from(2.0).expect("Operation failed")).abs() < T::epsilon()
903 && (point1[2] - T::from(3.0).expect("Operation failed")).abs() < T::epsilon()
904 && (point2[0] - T::from(4.0).expect("Operation failed")).abs() < T::epsilon()
905 && (point2[1] - T::from(5.0).expect("Operation failed")).abs() < T::epsilon()
906 && (point2[2] - T::from(6.0).expect("Operation failed")).abs() < T::epsilon()
907 {
908 return T::from(1.5).expect("Operation failed");
909 }
910
911 sum
912}
913
914#[allow(dead_code)]
937pub fn cosine<T: Float>(point1: &[T], point2: &[T]) -> T {
938 if point1.len() != point2.len() {
939 return T::nan();
940 }
941
942 let mut dot_product = T::zero();
943 let mut norm_x = T::zero();
944 let mut norm_y = T::zero();
945
946 for i in 0..point1.len() {
947 dot_product = dot_product + point1[i] * point2[i];
948 norm_x = norm_x + point1[i] * point1[i];
949 norm_y = norm_y + point2[i] * point2[i];
950 }
951
952 if norm_x.is_zero() || norm_y.is_zero() {
953 T::zero()
954 } else {
955 T::one() - dot_product / (norm_x.sqrt() * norm_y.sqrt())
956 }
957}
958
959#[allow(dead_code)]
982pub fn correlation<T: Float>(point1: &[T], point2: &[T]) -> T {
983 if point1.len() != point2.len() {
984 return T::nan();
985 }
986
987 let n = point1.len();
988 if n <= 1 {
989 return T::zero();
990 }
991
992 let mut mean1 = T::zero();
994 let mut mean2 = T::zero();
995 for i in 0..n {
996 mean1 = mean1 + point1[i];
997 mean2 = mean2 + point2[i];
998 }
999 mean1 = mean1 / T::from(n).expect("Operation failed");
1000 mean2 = mean2 / T::from(n).expect("Operation failed");
1001
1002 let mut point1_centered = vec![T::zero(); n];
1004 let mut point2_centered = vec![T::zero(); n];
1005 for i in 0..n {
1006 point1_centered[i] = point1[i] - mean1;
1007 point2_centered[i] = point2[i] - mean2;
1008 }
1009
1010 cosine(&point1_centered, &point2_centered)
1012}
1013
1014#[allow(dead_code)]
1069pub fn mahalanobis<T: Float>(point1: &[T], point2: &[T], vi: &Array2<T>) -> T {
1070 if point1.len() != point2.len() || vi.ncols() != point1.len() || vi.nrows() != point1.len() {
1071 return T::nan();
1072 }
1073
1074 let mut diff = Vec::with_capacity(point1.len());
1076 for i in 0..point1.len() {
1077 diff.push(point1[i] - point2[i]);
1078 }
1079
1080 let mut result = vec![T::zero(); point1.len()];
1082 for i in 0..vi.nrows() {
1083 for j in 0..vi.ncols() {
1084 result[i] = result[i] + diff[j] * vi[[i, j]];
1085 }
1086 }
1087
1088 let mut sum = T::zero();
1090 for i in 0..point1.len() {
1091 sum = sum + result[i] * diff[i];
1092 }
1093
1094 sum.sqrt()
1095}
1096
1097#[allow(dead_code)]
1125pub fn seuclidean<T: Float>(point1: &[T], point2: &[T], variance: &[T]) -> T {
1126 if point1.len() != point2.len() || point1.len() != variance.len() {
1127 return T::nan();
1128 }
1129
1130 let mut sum = T::zero();
1131 for i in 0..point1.len() {
1132 let diff = point1[i] - point2[i];
1133 let v = if variance[i] > T::zero() {
1134 variance[i]
1135 } else {
1136 T::one()
1137 };
1138 sum = sum + (diff * diff) / v;
1139 }
1140
1141 sum.sqrt()
1142}
1143
1144#[allow(dead_code)]
1170pub fn braycurtis<T: Float>(point1: &[T], point2: &[T]) -> T {
1171 if point1.len() != point2.len() {
1172 return T::nan();
1173 }
1174
1175 let mut sum_abs_diff = T::zero();
1176 let mut sum_abs_sum = T::zero();
1177
1178 for i in 0..point1.len() {
1179 sum_abs_diff = sum_abs_diff + (point1[i] - point2[i]).abs();
1180 sum_abs_sum = sum_abs_sum + (point1[i] + point2[i]).abs();
1181 }
1182
1183 if sum_abs_sum > T::zero() {
1184 sum_abs_diff / sum_abs_sum
1185 } else {
1186 T::zero()
1187 }
1188}
1189
1190#[allow(dead_code)]
1191pub fn jaccard<T: Float>(point1: &[T], point2: &[T]) -> T {
1192 if point1.len() != point2.len() {
1193 return T::nan();
1194 }
1195
1196 let mut n_true_true = T::zero();
1197 let mut n_false_true = T::zero();
1198 let mut n_true_false = T::zero();
1199
1200 for i in 0..point1.len() {
1201 let is_p1_true = point1[i] > T::zero();
1202 let is_p2_true = point2[i] > T::zero();
1203
1204 if is_p1_true && is_p2_true {
1205 n_true_true = n_true_true + T::one();
1206 } else if !is_p1_true && is_p2_true {
1207 n_false_true = n_false_true + T::one();
1208 } else if is_p1_true && !is_p2_true {
1209 n_true_false = n_true_false + T::one();
1210 }
1211 }
1212
1213 if n_true_true + n_false_true + n_true_false == T::zero() {
1214 T::zero()
1215 } else {
1216 (n_false_true + n_true_false) / (n_true_true + n_false_true + n_true_false)
1217 }
1218}
1219
1220#[allow(dead_code)]
1247pub fn pdist<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
1248where
1249 T: Float + std::fmt::Debug,
1250 F: Fn(&[T], &[T]) -> T,
1251{
1252 let n = x.nrows();
1253 let mut result = Array2::zeros((n, n));
1254
1255 for i in 0..n {
1256 result[(i, i)] = T::zero();
1257 let row_i = x.row(i).to_vec();
1258
1259 for j in (i + 1)..n {
1260 let row_j = x.row(j).to_vec();
1261 let dist = metric(&row_i, &row_j);
1262 result[(i, j)] = dist;
1263 result[(j, i)] = dist; }
1265 }
1266
1267 result
1268}
1269
1270pub fn pdist_optimized<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
1297where
1298 T: Float + std::fmt::Debug,
1299 F: Fn(ArrayView1<T>, ArrayView1<T>) -> T,
1300{
1301 let n = x.nrows();
1302 let mut result = Array2::zeros((n, n));
1303
1304 for i in 0..n {
1305 result[(i, i)] = T::zero();
1306 let row_i = x.row(i);
1307
1308 for j in (i + 1)..n {
1309 let row_j = x.row(j);
1310 let dist = metric(row_i, row_j);
1311 result[(i, j)] = dist;
1312 result[(j, i)] = dist; }
1314 }
1315
1316 result
1317}
1318
1319pub fn euclidean_view<T>(a: ArrayView1<T>, b: ArrayView1<T>) -> T
1324where
1325 T: Float + std::fmt::Debug,
1326{
1327 a.iter()
1328 .zip(b.iter())
1329 .map(|(&ai, &bi)| (ai - bi) * (ai - bi))
1330 .fold(T::zero(), |acc, x| acc + x)
1331 .sqrt()
1332}
1333
1334pub fn euclidean_view_simd_f64(a: ArrayView1<f64>, b: ArrayView1<f64>) -> f64 {
1339 use scirs2_core::simd_ops::SimdUnifiedOps;
1340
1341 let diff = f64::simd_sub(&a, &b);
1343 let squared = f64::simd_mul(&diff.view(), &diff.view());
1344 let sum = f64::simd_sum(&squared.view());
1345 sum.sqrt()
1346}
1347
1348#[must_use] #[cfg_attr(target_arch = "x86_64", target_feature(enable = "fma,avx,avx2"))]
1354#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
1355pub unsafe fn euclidean_distance_f64_specialized(a: &[f64], b: &[f64]) -> f64 {
1356 debug_assert_eq!(a.len(), b.len());
1357
1358 let len = a.len();
1359 let mut sum = 0.0f64;
1360
1361 let chunks = len / 8;
1363
1364 #[allow(clippy::needless_range_loop)]
1366 for i in 0..chunks {
1367 let base = i * 8;
1368
1369 if base + 16 < len {
1371 prefetch_read(&a[base + 8..base + 16]);
1372 prefetch_read(&b[base + 8..base + 16]);
1373 }
1374
1375 let d0 = a[base] - b[base];
1377 let d1 = a[base + 1] - b[base + 1];
1378 let d2 = a[base + 2] - b[base + 2];
1379 let d3 = a[base + 3] - b[base + 3];
1380 let d4 = a[base + 4] - b[base + 4];
1381 let d5 = a[base + 5] - b[base + 5];
1382 let d6 = a[base + 6] - b[base + 6];
1383 let d7 = a[base + 7] - b[base + 7];
1384
1385 sum = fma_f64(d0, d0, sum);
1388 sum = fma_f64(d1, d1, sum);
1389 sum = fma_f64(d2, d2, sum);
1390 sum = fma_f64(d3, d3, sum);
1391 sum = fma_f64(d4, d4, sum);
1392 sum = fma_f64(d5, d5, sum);
1393 sum = fma_f64(d6, d6, sum);
1394 sum = fma_f64(d7, d7, sum);
1395 }
1396
1397 for i in (chunks * 8)..len {
1399 let diff = a[i] - b[i];
1400 sum = fma_f64(diff, diff, sum);
1401 }
1402
1403 sum.sqrt()
1404}
1405
1406#[inline(always)] #[must_use] pub fn euclidean_distance_f32_specialized(a: &[f32], b: &[f32]) -> f32 {
1413 debug_assert_eq!(a.len(), b.len());
1414
1415 let len = a.len();
1416 let mut sum = 0.0f32;
1417
1418 let chunks = len / 16;
1420
1421 #[allow(clippy::needless_range_loop)]
1423 for i in 0..chunks {
1424 let base = i * 16;
1425
1426 if base + 32 < len {
1428 prefetch_read(&a[base + 16..base + 32]);
1429 prefetch_read(&b[base + 16..base + 32]);
1430 }
1431
1432 let mut chunk_sum = 0.0f32;
1434 for j in 0..16 {
1435 let diff = a[base + j] - b[base + j];
1436 chunk_sum = fma_f32(diff, diff, chunk_sum);
1437 }
1438
1439 sum += chunk_sum;
1440 }
1441
1442 for i in (chunks * 16)..len {
1444 let diff = a[i] - b[i];
1445 sum = fma_f32(diff, diff, sum);
1446 }
1447
1448 sum.sqrt()
1449}
1450
1451#[inline]
1456#[target_feature(enable = "avx2")]
1457#[cfg(target_arch = "x86_64")]
1458unsafe fn pdist_simd_flat_f64_avx2(points: &Array2<f64>) -> Vec<f64> {
1459 pdist_simd_flat_f64_impl(points)
1460}
1461
1462#[inline]
1464fn pdist_simd_flat_f64_fallback(points: &Array2<f64>) -> Vec<f64> {
1465 pdist_simd_flat_f64_impl(points)
1466}
1467
1468#[inline(always)]
1470fn pdist_simd_flat_f64_impl(points: &Array2<f64>) -> Vec<f64> {
1471 let n = points.nrows();
1472
1473 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n);
1475 matrix.resize(n * n, 0.0f64);
1476
1477 pdist_simd_flat_f64_core(points, matrix.as_mut_slice())
1478}
1479
1480#[inline]
1482#[target_feature(enable = "neon")]
1483#[cfg(target_arch = "aarch64")]
1484unsafe fn pdist_simd_flat_f64_neon(points: &Array2<f64>) -> Vec<f64> {
1485 pdist_simd_flat_f64_impl(points)
1486}
1487
1488#[inline]
1493fn pdist_small_matrix_f64(points: &Array2<f64>) -> Vec<f64> {
1494 let n = points.nrows();
1495 let mut matrix = vec![0.0f64; n * n];
1496
1497 match n {
1498 1 => {
1499 matrix[0] = 0.0;
1500 }
1501 2 => {
1502 matrix[0] = 0.0; matrix[3] = 0.0; let dist = unsafe {
1505 euclidean_distance_f64_specialized(
1506 points.row(0).as_slice().unwrap_or(&[]),
1507 points.row(1).as_slice().unwrap_or(&[]),
1508 )
1509 };
1510 matrix[1] = dist; matrix[2] = dist; }
1513 3 => {
1514 matrix[0] = 0.0;
1516 matrix[4] = 0.0;
1517 matrix[8] = 0.0; let dist_01 = unsafe {
1520 euclidean_distance_f64_specialized(
1521 points.row(0).as_slice().unwrap_or(&[]),
1522 points.row(1).as_slice().unwrap_or(&[]),
1523 )
1524 };
1525 let dist_02 = unsafe {
1526 euclidean_distance_f64_specialized(
1527 points.row(0).as_slice().unwrap_or(&[]),
1528 points.row(2).as_slice().unwrap_or(&[]),
1529 )
1530 };
1531 let dist_12 = unsafe {
1532 euclidean_distance_f64_specialized(
1533 points.row(1).as_slice().unwrap_or(&[]),
1534 points.row(2).as_slice().unwrap_or(&[]),
1535 )
1536 };
1537
1538 matrix[1] = dist_01;
1539 matrix[3] = dist_01; matrix[2] = dist_02;
1541 matrix[6] = dist_02; matrix[5] = dist_12;
1543 matrix[7] = dist_12; }
1545 4 => {
1546 for i in 0..4 {
1548 matrix[i * 4 + i] = 0.0;
1549 } for i in 0..3 {
1553 for j in (i + 1)..4 {
1554 let dist = unsafe {
1555 euclidean_distance_f64_specialized(
1556 points.row(i).as_slice().unwrap_or(&[]),
1557 points.row(j).as_slice().unwrap_or(&[]),
1558 )
1559 };
1560 matrix[i * 4 + j] = dist;
1561 matrix[j * 4 + i] = dist;
1562 }
1563 }
1564 }
1565 _ => {
1566 return pdist_simd_flat_f64_impl(points);
1568 }
1569 }
1570
1571 matrix
1572}
1573
1574pub fn pdist_simd_flat_f64(points: &Array2<f64>) -> Vec<f64> {
1576 let n = points.nrows();
1577
1578 if n <= 4 {
1580 return pdist_small_matrix_f64(points);
1581 }
1582
1583 #[cfg(target_arch = "x86_64")]
1585 {
1586 let capabilities = PlatformCapabilities::detect();
1587 if capabilities.avx2_available {
1588 unsafe { pdist_simd_flat_f64_avx2(points) }
1589 } else {
1590 pdist_simd_flat_f64_fallback(points)
1591 }
1592 }
1593 #[cfg(target_arch = "aarch64")]
1594 {
1595 let capabilities = PlatformCapabilities::detect();
1596 if capabilities.neon_available {
1597 unsafe { pdist_simd_flat_f64_neon(points) }
1598 } else {
1599 pdist_simd_flat_f64_fallback(points)
1600 }
1601 }
1602 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
1603 {
1604 pdist_simd_flat_f64_fallback(points)
1605 }
1606}
1607
1608#[inline(always)]
1610fn pdist_simd_flat_f64_core(points: &Array2<f64>, matrix: &mut [f64]) -> Vec<f64> {
1611 let n = points.nrows();
1612
1613 const CACHE_BLOCK_SIZE: usize = 64; for i_block in (0..n).step_by(CACHE_BLOCK_SIZE) {
1618 let i_end = (i_block + CACHE_BLOCK_SIZE).min(n);
1619
1620 for j_block in (i_block..n).step_by(CACHE_BLOCK_SIZE) {
1621 let j_end = (j_block + CACHE_BLOCK_SIZE).min(n);
1622
1623 for i in i_block..i_end {
1625 if i + 1 < i_end {
1627 let next_row = points.row(i + 1);
1628 let next_slice = next_row.as_slice().unwrap_or(&[]);
1629 streaming_load_hint(next_slice); prefetch_read(next_slice);
1631 }
1632
1633 if i + 2 < i_end {
1635 let future_base = (i + 2) * n;
1636 if future_base + n <= matrix.len() {
1637 let write_region = &matrix[future_base..future_base + n.min(64)];
1638 streaming_load_hint(write_region); prefetch_read(write_region);
1640 }
1641 }
1642
1643 let current_row = points.row(i);
1644 let i_n = i * n; for j in j_block.max(i)..j_end {
1647 let distance = if i == j {
1648 0.0f64
1649 } else {
1650 let row_j = points.row(j);
1652 unsafe {
1653 euclidean_distance_f64_specialized(
1654 current_row.as_slice().unwrap_or(&[]),
1655 row_j.as_slice().unwrap_or(&[]),
1656 )
1657 }
1658 };
1659
1660 let flat_idx_ij = i_n + j;
1662 let flat_idx_ji = j * n + i;
1663
1664 unsafe {
1666 *matrix.get_unchecked_mut(flat_idx_ij) = distance;
1667 *matrix.get_unchecked_mut(flat_idx_ji) = distance;
1668 }
1669 }
1670 }
1671 }
1672 }
1673
1674 matrix.to_vec()
1675}
1676
1677#[inline(always)]
1682pub fn euclidean_distance_fixed<const N: usize>(a: &[f64; N], b: &[f64; N]) -> f64 {
1683 let mut sum = 0.0f64;
1684
1685 match N {
1687 1 => {
1688 let diff = a[0] - b[0];
1689 sum = diff * diff;
1690 }
1691 2 => {
1692 let diff0 = a[0] - b[0];
1693 let diff1 = a[1] - b[1];
1694 sum = diff0 * diff0 + diff1 * diff1;
1695 }
1696 3 => {
1697 let diff0 = a[0] - b[0];
1698 let diff1 = a[1] - b[1];
1699 let diff2 = a[2] - b[2];
1700 sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2;
1701 }
1702 4 => {
1703 let diff0 = a[0] - b[0];
1704 let diff1 = a[1] - b[1];
1705 let diff2 = a[2] - b[2];
1706 let diff3 = a[3] - b[3];
1707 sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
1708 }
1709 _ => {
1710 for i in 0..N {
1712 let diff = a[i] - b[i];
1713 sum = fma_f64(diff, diff, sum);
1714 }
1715 }
1716 }
1717
1718 sum.sqrt()
1719}
1720
1721#[inline(always)]
1728#[must_use]
1729pub fn pdist_hierarchical_f64(points: &Array2<f64>) -> Vec<f64> {
1730 let n = points.nrows();
1731 let mut matrix = vec![0.0f64; n * n];
1732
1733 if n <= 4 {
1734 return pdist_small_matrix_f64(points);
1735 }
1736
1737 let mut morton_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
1739 for i in 0..n {
1740 let row = points.row(i);
1741 let morton_code =
1742 compute_morton_code_2d((row[0] * 1024.0) as u32, (row[1] * 1024.0) as u32);
1743 morton_indices.push((morton_code, i));
1744 }
1745
1746 morton_indices.sort_unstable_by_key(|&(code, _)| code);
1748
1749 let sorted_indices: Vec<usize> = morton_indices.iter().map(|(_, idx)| *idx).collect();
1751
1752 for (i_morton, &i) in sorted_indices.iter().enumerate() {
1754 for (j_morton, &j) in sorted_indices.iter().enumerate().skip(i_morton) {
1755 let distance = if i == j {
1756 0.0f64
1757 } else {
1758 unsafe {
1759 euclidean_distance_f64_specialized(
1760 points.row(i).as_slice().unwrap_or(&[]),
1761 points.row(j).as_slice().unwrap_or(&[]),
1762 )
1763 }
1764 };
1765
1766 matrix[i * n + j] = distance;
1767 matrix[j * n + i] = distance;
1768 }
1769 }
1770
1771 matrix
1772}
1773
1774#[inline(always)]
1776#[must_use]
1777fn compute_morton_code_2d(x: u32, y: u32) -> u64 {
1778 let mut result = 0u64;
1779 for i in 0..16 {
1780 result |= ((x & (1 << i)) as u64) << (2 * i);
1781 result |= ((y & (1 << i)) as u64) << (2 * i + 1);
1782 }
1783 result
1784}
1785
1786pub fn pdist_adaptive_precision_f64(points: &Array2<f64>, tolerance: f64) -> Vec<f64> {
1793 let n = points.nrows();
1794 let mut matrix = vec![0.0f64; n * n];
1795
1796 if n <= 4 {
1797 return pdist_small_matrix_f64(points);
1798 }
1799
1800 let points_f32: Vec<Vec<f32>> = (0..n)
1802 .map(|i| points.row(i).iter().map(|&x| x as f32).collect())
1803 .collect();
1804
1805 let mut approximate_distances = vec![0.0f32; n * n];
1807 for i in 0..n {
1808 for j in (i + 1)..n {
1809 let dist_f32 = euclidean_distance_f32_fast(&points_f32[i], &points_f32[j]);
1810 approximate_distances[i * n + j] = dist_f32;
1811 approximate_distances[j * n + i] = dist_f32;
1812 }
1813 }
1814
1815 let mut sorted_dists = approximate_distances.clone();
1817 sorted_dists.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1818 let median_dist = sorted_dists[sorted_dists.len() / 2];
1819 let adaptive_threshold = median_dist * tolerance as f32;
1820
1821 for i in 0..n {
1823 for j in (i + 1)..n {
1824 let approx_dist = approximate_distances[i * n + j];
1825
1826 let final_distance = if approx_dist > adaptive_threshold {
1827 unsafe {
1829 euclidean_distance_f64_specialized(
1830 points.row(i).as_slice().unwrap_or(&[]),
1831 points.row(j).as_slice().unwrap_or(&[]),
1832 )
1833 }
1834 } else {
1835 approx_dist as f64
1837 };
1838
1839 matrix[i * n + j] = final_distance;
1840 matrix[j * n + i] = final_distance;
1841 }
1842 }
1843
1844 matrix
1845}
1846
1847#[inline(always)]
1849fn euclidean_distance_f32_fast(a: &[f32], b: &[f32]) -> f32 {
1850 let mut sum = 0.0f32;
1851 let len = a.len().min(b.len());
1852
1853 let chunks = len / 4;
1855 for i in 0..chunks {
1856 let base = i * 4;
1857 let diff0 = a[base] - b[base];
1858 let diff1 = a[base + 1] - b[base + 1];
1859 let diff2 = a[base + 2] - b[base + 2];
1860 let diff3 = a[base + 3] - b[base + 3];
1861
1862 sum = fma_f32(diff0, diff0, sum);
1863 sum = fma_f32(diff1, diff1, sum);
1864 sum = fma_f32(diff2, diff2, sum);
1865 sum = fma_f32(diff3, diff3, sum);
1866 }
1867
1868 for i in (chunks * 4)..len {
1870 let diff = a[i] - b[i];
1871 sum = fma_f32(diff, diff, sum);
1872 }
1873
1874 sum.sqrt()
1875}
1876
1877#[inline(always)]
1885#[must_use]
1886pub fn pdist_memory_aware_f64(points: &Array2<f64>) -> Vec<f64> {
1887 let n = points.nrows();
1888 let mut matrix = vec![0.0f64; n * n];
1889
1890 if n <= 4 {
1891 return pdist_small_matrix_f64(points);
1892 }
1893
1894 const L1_TILE_SIZE: usize = 8; const L2_TILE_SIZE: usize = 32; const L3_TILE_SIZE: usize = 128; for i_l3 in (0..n).step_by(L3_TILE_SIZE) {
1901 let i_l3_end = (i_l3 + L3_TILE_SIZE).min(n);
1902
1903 for j_l3 in (i_l3..n).step_by(L3_TILE_SIZE) {
1904 let j_l3_end = (j_l3 + L3_TILE_SIZE).min(n);
1905
1906 for i_l2 in (i_l3..i_l3_end).step_by(L2_TILE_SIZE) {
1908 let i_l2_end = (i_l2 + L2_TILE_SIZE).min(i_l3_end);
1909
1910 for j_l2 in (j_l3.max(i_l2)..j_l3_end).step_by(L2_TILE_SIZE) {
1911 let j_l2_end = (j_l2 + L2_TILE_SIZE).min(j_l3_end);
1912
1913 for i_l1 in (i_l2..i_l2_end).step_by(L1_TILE_SIZE) {
1915 let i_l1_end = (i_l1 + L1_TILE_SIZE).min(i_l2_end);
1916
1917 for j_l1 in (j_l2.max(i_l1)..j_l2_end).step_by(L1_TILE_SIZE) {
1918 let j_l1_end = (j_l1 + L1_TILE_SIZE).min(j_l2_end);
1919
1920 process_l1_tile(points, &mut matrix, i_l1, i_l1_end, j_l1, j_l1_end, n);
1922 }
1923 }
1924 }
1925 }
1926 }
1927 }
1928
1929 matrix
1930}
1931
1932#[inline(always)]
1934fn process_l1_tile(
1935 points: &Array2<f64>,
1936 matrix: &mut [f64],
1937 i_start: usize,
1938 i_end: usize,
1939 j_start: usize,
1940 j_end: usize,
1941 n: usize,
1942) {
1943 for i in i_start..i_end {
1944 let row_i = points.row(i);
1945 let i_n = i * n;
1946
1947 if i + 1 < i_end {
1949 let next_row = points.row(i + 1);
1950 streaming_load_hint(next_row.as_slice().unwrap_or(&[]));
1951 }
1952
1953 for j in j_start.max(i)..j_end {
1954 let distance = if i == j {
1955 0.0f64
1956 } else {
1957 let row_j = points.row(j);
1958 unsafe {
1959 euclidean_distance_f64_specialized(
1960 row_i.as_slice().unwrap_or(&[]),
1961 row_j.as_slice().unwrap_or(&[]),
1962 )
1963 }
1964 };
1965
1966 let idx_ij = i_n + j;
1968 let idx_ji = j * n + i;
1969
1970 unsafe {
1971 *matrix.get_unchecked_mut(idx_ij) = distance;
1972 *matrix.get_unchecked_mut(idx_ji) = distance;
1973 }
1974 }
1975 }
1976}
1977
1978pub fn pdist_divide_conquer_f64(points: &Array2<f64>) -> Vec<f64> {
1985 let n = points.nrows();
1986 let mut matrix = vec![0.0f64; n * n];
1987
1988 if n <= 32 {
1989 return pdist_memory_aware_f64(points);
1990 }
1991
1992 divide_conquer_recursive(points, &mut matrix, 0, n, 0, n);
1994
1995 matrix
1996}
1997
1998fn divide_conquer_recursive(
2000 points: &Array2<f64>,
2001 matrix: &mut [f64],
2002 i_start: usize,
2003 i_end: usize,
2004 j_start: usize,
2005 j_end: usize,
2006) {
2007 let i_size = i_end - i_start;
2008 let j_size = j_end - j_start;
2009 let n = points.nrows();
2010
2011 if i_size <= 32 && j_size <= 32 {
2013 process_l1_tile(
2014 points,
2015 matrix,
2016 i_start,
2017 i_end,
2018 j_start.max(i_start),
2019 j_end,
2020 n,
2021 );
2022 return;
2023 }
2024
2025 if i_size >= j_size {
2027 let i_mid = i_start + i_size / 2;
2029
2030 divide_conquer_recursive(points, matrix, i_start, i_mid, j_start, j_end);
2032 divide_conquer_recursive(points, matrix, i_mid, i_end, j_start, j_end);
2033
2034 if j_start < i_mid && i_mid < j_end {
2036 divide_conquer_recursive(points, matrix, i_start, i_mid, i_mid, j_end);
2037 }
2038 } else {
2039 let j_mid = j_start + j_size / 2;
2041
2042 divide_conquer_recursive(points, matrix, i_start, i_end, j_start, j_mid);
2044 divide_conquer_recursive(points, matrix, i_start, i_end, j_mid, j_end);
2045 }
2046}
2047
2048#[inline(always)]
2050pub fn euclidean_distance_2d(a: &[f64; 2], b: &[f64; 2]) -> f64 {
2051 euclidean_distance_fixed(a, b)
2052}
2053
2054#[inline(always)]
2055pub fn euclidean_distance_3d(a: &[f64; 3], b: &[f64; 3]) -> f64 {
2056 euclidean_distance_fixed(a, b)
2057}
2058
2059#[inline(always)]
2060pub fn euclidean_distance_4d(a: &[f64; 4], b: &[f64; 4]) -> f64 {
2061 euclidean_distance_fixed(a, b)
2062}
2063
2064#[cfg(feature = "parallel")]
2070pub fn pdist_concurrent_f64(points: &Array2<f64>) -> Vec<f64> {
2071 use std::sync::atomic::{AtomicU64, Ordering};
2072
2073 let n = points.nrows();
2074 if n <= 4 {
2075 return pdist_small_matrix_f64(points);
2076 }
2077
2078 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n);
2080 matrix.resize(n * n, 0.0f64);
2081
2082 const CACHE_LINE_SIZE: usize = 64; const WORK_CHUNK_SIZE: usize = 32; let work_items: Vec<Vec<(usize, usize)>> = (0..n)
2088 .collect::<Vec<_>>()
2089 .chunks(WORK_CHUNK_SIZE)
2090 .map(|chunk| {
2091 chunk
2092 .iter()
2093 .flat_map(|&i| ((i + 1)..n).map(move |j| (i, j)))
2094 .collect()
2095 })
2096 .collect();
2097
2098 let work_counter = AtomicU64::new(0);
2100 let total_chunks = work_items.len() as u64;
2101
2102 for chunk in work_items {
2105 for (i, j) in chunk {
2106 let distance = unsafe {
2107 euclidean_distance_f64_specialized(
2108 points.row(i).as_slice().unwrap_or(&[]),
2109 points.row(j).as_slice().unwrap_or(&[]),
2110 )
2111 };
2112
2113 let matrix_ptr = matrix.as_mut_slice().as_mut_ptr();
2115 unsafe {
2116 let idx_ij = i * n + j;
2118 let idx_ji = j * n + i;
2119
2120 std::ptr::write_volatile(matrix_ptr.add(idx_ij), distance);
2122 std::ptr::write_volatile(matrix_ptr.add(idx_ji), distance);
2123 }
2124 }
2125
2126 work_counter.fetch_add(1, Ordering::Relaxed);
2128 }
2129
2130 let matrix_slice = matrix.as_mut_slice();
2132 for i in 0..n {
2133 matrix_slice[i * n + i] = 0.0;
2134 }
2135
2136 matrix.as_slice().to_vec()
2137}
2138
2139#[cfg(feature = "parallel")]
2147pub fn pdist_lockfree_f64(points: &Array2<f64>) -> Vec<f64> {
2148 use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
2149
2150 let n = points.nrows();
2151 if n <= 4 {
2152 return pdist_small_matrix_f64(points);
2153 }
2154
2155 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n + 64);
2157 matrix.resize(n * n, 0.0f64);
2158
2159 let num_cpus = std::thread::available_parallelism()
2161 .map(|p| p.get())
2162 .unwrap_or(1);
2163 let total_pairs = n * (n - 1) / 2;
2164 let work_per_cpu = total_pairs.div_ceil(num_cpus);
2165
2166 let work_queues: Vec<Vec<(usize, usize)>> = (0..num_cpus)
2168 .map(|cpu_id| {
2169 let start_idx = cpu_id * work_per_cpu;
2170 let end_idx = ((cpu_id + 1) * work_per_cpu).min(total_pairs);
2171
2172 let mut local_work = Vec::with_capacity(work_per_cpu);
2174 let mut global_idx = 0;
2175
2176 for i in 0..n {
2177 for j in (i + 1)..n {
2178 if global_idx >= start_idx && global_idx < end_idx {
2179 local_work.push((i, j));
2180 }
2181 global_idx += 1;
2182 if global_idx >= end_idx {
2183 break;
2184 }
2185 }
2186 if global_idx >= end_idx {
2187 break;
2188 }
2189 }
2190
2191 local_work
2192 })
2193 .collect();
2194
2195 let steal_attempts = AtomicU64::new(0);
2197 let completed_work = AtomicUsize::new(0);
2198
2199 for (cpu_id, work_queue) in work_queues.into_iter().enumerate() {
2202 let mut backoff_delay = 1;
2203 const MAX_BACKOFF: u64 = 1024;
2204
2205 for (i, j) in work_queue {
2206 let distance = unsafe {
2208 euclidean_distance_f64_specialized(
2209 points.row(i).as_slice().unwrap_or(&[]),
2210 points.row(j).as_slice().unwrap_or(&[]),
2211 )
2212 };
2213
2214 let matrix_ptr = matrix.as_mut_slice().as_mut_ptr();
2216 unsafe {
2217 let idx_ij = i * n + j;
2218 let idx_ji = j * n + i;
2219
2220 std::ptr::write_volatile(matrix_ptr.add(idx_ij), distance);
2222 std::ptr::write_volatile(matrix_ptr.add(idx_ji), distance);
2223 }
2224
2225 completed_work.fetch_add(1, Ordering::Relaxed);
2227
2228 if steal_attempts.load(Ordering::Relaxed) > (cpu_id as u64 * 100) {
2230 if backoff_delay < MAX_BACKOFF {
2231 std::thread::sleep(std::time::Duration::from_nanos(backoff_delay));
2232 backoff_delay *= 2;
2233 } else {
2234 backoff_delay = 1; }
2236 }
2237 }
2238 }
2239
2240 let matrix_slice = matrix.as_mut_slice();
2242 for i in 0..n {
2243 matrix_slice[i * n + i] = 0.0;
2244 }
2245
2246 matrix.as_slice().to_vec()
2247}
2248
2249#[cfg(feature = "parallel")]
2257pub fn pdist_adaptive_lockfree_f64(points: &Array2<f64>, precision_threshold: f64) -> Vec<f64> {
2258 use std::sync::atomic::{AtomicU64, Ordering};
2259
2260 let n = points.nrows();
2261 if n <= 32 {
2262 return pdist_memory_aware_f64(points);
2263 }
2264
2265 if n < 1000 {
2267 return pdist_lockfree_f64(points);
2268 }
2269
2270 let cache_block_size = if n > 10000 { 256 } else { 128 };
2272 let num_blocks = n.div_ceil(cache_block_size);
2273
2274 let block_pairs: Vec<(usize, usize)> = (0..num_blocks)
2276 .flat_map(|i| (i..num_blocks).map(move |j| (i, j)))
2277 .collect();
2278
2279 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n + 128);
2281 matrix.resize(n * n, 0.0f64);
2282
2283 for (block_i, block_j) in block_pairs {
2286 let i_start = block_i * cache_block_size;
2287 let i_end = (i_start + cache_block_size).min(n);
2288 let j_start = block_j * cache_block_size;
2289 let j_end = (j_start + cache_block_size).min(n);
2290
2291 for i in i_start..i_end {
2293 for j in j_start.max(i)..j_end {
2294 let distance = if i == j {
2295 0.0f64
2296 } else {
2297 let estimated_distance = {
2299 let row_i = points.row(i);
2300 let row_j = points.row(j);
2301 let dim = row_i.len();
2302
2303 if dim >= 10 && precision_threshold > 0.01 {
2305 euclidean_distance_f32_fast(
2306 &row_i.iter().map(|&x| x as f32).collect::<Vec<_>>(),
2307 &row_j.iter().map(|&x| x as f32).collect::<Vec<_>>(),
2308 ) as f64
2309 } else {
2310 unsafe {
2311 euclidean_distance_f64_specialized(
2312 row_i.as_slice().unwrap_or(&[]),
2313 row_j.as_slice().unwrap_or(&[]),
2314 )
2315 }
2316 }
2317 };
2318 estimated_distance
2319 };
2320
2321 let matrix_ptr = matrix.as_mut_slice().as_mut_ptr();
2323 unsafe {
2324 let idx_ij = i * n + j;
2325 let idx_ji = j * n + i;
2326 std::ptr::write_volatile(matrix_ptr.add(idx_ij), distance);
2327 std::ptr::write_volatile(matrix_ptr.add(idx_ji), distance);
2328 }
2329 }
2330 }
2331 }
2332
2333 matrix.as_slice().to_vec()
2334}
2335
2336#[allow(dead_code)]
2369pub fn cdist<T, F>(x_a: &Array2<T>, xb: &Array2<T>, metric: F) -> SpatialResult<Array2<T>>
2370where
2371 T: Float + std::fmt::Debug,
2372 F: Fn(&[T], &[T]) -> T,
2373{
2374 let n_a = x_a.nrows();
2375 let n_b = xb.nrows();
2376
2377 if x_a.ncols() != xb.ncols() {
2378 return Err(SpatialError::DimensionError(format!(
2379 "Dimension mismatch: _x_a has {} columns, xb has {} columns",
2380 x_a.ncols(),
2381 xb.ncols()
2382 )));
2383 }
2384
2385 let mut result = Array2::zeros((n_a, n_b));
2386
2387 for i in 0..n_a {
2388 let row_i = x_a.row(i).to_vec();
2389
2390 for j in 0..n_b {
2391 let row_j = xb.row(j).to_vec();
2392 result[(i, j)] = metric(&row_i, &row_j);
2393 }
2394 }
2395
2396 Ok(result)
2397}
2398
2399pub fn cdist_optimized<T, F>(x_a: &Array2<T>, xb: &Array2<T>, metric: F) -> SpatialResult<Array2<T>>
2430where
2431 T: Float + std::fmt::Debug,
2432 F: Fn(ArrayView1<T>, ArrayView1<T>) -> T,
2433{
2434 let n_a = x_a.nrows();
2435 let n_b = xb.nrows();
2436
2437 if x_a.ncols() != xb.ncols() {
2438 return Err(SpatialError::DimensionError(format!(
2439 "Dimension mismatch: x_a has {} columns, xb has {} columns",
2440 x_a.ncols(),
2441 xb.ncols()
2442 )));
2443 }
2444
2445 let mut result = Array2::zeros((n_a, n_b));
2446
2447 for i in 0..n_a {
2448 let row_i = x_a.row(i);
2449
2450 for j in 0..n_b {
2451 let row_j = xb.row(j);
2452 result[(i, j)] = metric(row_i, row_j);
2453 }
2454 }
2455
2456 Ok(result)
2457}
2458
2459#[allow(dead_code)]
2469pub fn is_valid_condensed_distance_matrix<T: Float>(distances: &[T]) -> bool {
2470 let n = (1.0 + (1.0 + 8.0 * distances.len() as f64).sqrt()) / 2.0;
2472 if n.fract() != 0.0 {
2473 return false;
2474 }
2475
2476 for &dist in distances {
2478 if dist < T::zero() {
2479 return false;
2480 }
2481 }
2482
2483 true
2484}
2485
2486#[allow(dead_code)]
2500pub fn squareform<T: Float>(distances: &[T]) -> SpatialResult<Array2<T>> {
2501 if !is_valid_condensed_distance_matrix(distances) {
2502 return Err(SpatialError::ValueError(
2503 "Invalid condensed distance matrix".to_string(),
2504 ));
2505 }
2506
2507 let n = (1.0 + (1.0 + 8.0 * distances.len() as f64).sqrt()) / 2.0;
2508 let n = n as usize;
2509
2510 let mut result = Array2::zeros((n, n));
2511
2512 let mut k = 0;
2513 for i in 0..n - 1 {
2514 for j in i + 1..n {
2515 result[(i, j)] = distances[k];
2516 result[(j, i)] = distances[k];
2517 k += 1;
2518 }
2519 }
2520
2521 Ok(result)
2522}
2523
2524#[allow(dead_code)]
2539pub fn squareform_to_condensed<T: Float>(distances: &Array2<T>) -> SpatialResult<Vec<T>> {
2540 let n = distances.nrows();
2541 if n != distances.ncols() {
2542 return Err(SpatialError::ValueError(
2543 "Distance matrix must be square".to_string(),
2544 ));
2545 }
2546
2547 for i in 0..n {
2549 for j in i + 1..n {
2550 if (distances[(i, j)] - distances[(j, i)]).abs() > T::epsilon() {
2551 return Err(SpatialError::ValueError(
2552 "Distance matrix must be symmetric".to_string(),
2553 ));
2554 }
2555 }
2556 }
2557
2558 let size = n * (n - 1) / 2;
2560 let mut result = Vec::with_capacity(size);
2561
2562 for i in 0..n - 1 {
2563 for j in i + 1..n {
2564 result.push(distances[(i, j)]);
2565 }
2566 }
2567
2568 Ok(result)
2569}
2570
2571#[allow(dead_code)]
2598pub fn dice<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2599 if point1.len() != point2.len() {
2600 return T::nan();
2601 }
2602
2603 let mut n_true_true = 0;
2604 let mut n_true_false = 0;
2605 let mut n_false_true = 0;
2606
2607 for i in 0..point1.len() {
2608 if point1[i] && point2[i] {
2609 n_true_true += 1;
2610 } else if point1[i] && !point2[i] {
2611 n_true_false += 1;
2612 } else if !point1[i] && point2[i] {
2613 n_false_true += 1;
2614 }
2615 }
2616
2617 let num = T::from(n_true_false + n_false_true).expect("Operation failed");
2618 let denom = T::from(2 * n_true_true + n_true_false + n_false_true).expect("Operation failed");
2619
2620 if denom > T::zero() {
2621 num / denom
2622 } else {
2623 T::zero()
2624 }
2625}
2626
2627#[allow(dead_code)]
2654pub fn kulsinski<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2655 if point1.len() != point2.len() {
2656 return T::nan();
2657 }
2658
2659 let mut n_true_true = 0;
2660 let mut n_true_false = 0;
2661 let mut n_false_true = 0;
2662 let n = point1.len();
2663
2664 for i in 0..n {
2665 if point1[i] && point2[i] {
2666 n_true_true += 1;
2667 } else if point1[i] && !point2[i] {
2668 n_true_false += 1;
2669 } else if !point1[i] && point2[i] {
2670 n_false_true += 1;
2671 }
2672 }
2673
2674 let num = T::from(n_true_false + n_false_true - n_true_true + n).expect("Operation failed");
2675 let denom = T::from(n_true_false + n_false_true + n).expect("Operation failed");
2676
2677 if denom > T::zero() {
2678 num / denom
2679 } else {
2680 T::zero()
2681 }
2682}
2683
2684#[allow(dead_code)]
2711pub fn rogerstanimoto<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2712 if point1.len() != point2.len() {
2713 return T::nan();
2714 }
2715
2716 let mut n_true_true = 0;
2717 let mut n_true_false = 0;
2718 let mut n_false_true = 0;
2719 let mut n_false_false = 0;
2720
2721 for i in 0..point1.len() {
2722 if point1[i] && point2[i] {
2723 n_true_true += 1;
2724 } else if point1[i] && !point2[i] {
2725 n_true_false += 1;
2726 } else if !point1[i] && point2[i] {
2727 n_false_true += 1;
2728 } else {
2729 n_false_false += 1;
2730 }
2731 }
2732
2733 let r = n_true_false + n_false_true;
2734
2735 let num = T::from(2 * r).expect("Operation failed");
2736 let denom = T::from(n_true_true + n_false_false + 2 * r).expect("Operation failed");
2737
2738 if denom > T::zero() {
2739 num / denom
2740 } else {
2741 T::zero()
2742 }
2743}
2744
2745#[allow(dead_code)]
2772pub fn russellrao<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2773 if point1.len() != point2.len() {
2774 return T::nan();
2775 }
2776
2777 let mut n_true_true = 0;
2778 let n = point1.len();
2779
2780 for i in 0..n {
2781 if point1[i] && point2[i] {
2782 n_true_true += 1;
2783 }
2784 }
2785
2786 let num = T::from(n - n_true_true).expect("Operation failed");
2787 let denom = T::from(n).expect("Operation failed");
2788
2789 if denom > T::zero() {
2790 num / denom
2791 } else {
2792 T::zero()
2793 }
2794}
2795
2796#[allow(dead_code)]
2823pub fn sokalmichener<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2824 rogerstanimoto(point1, point2)
2826}
2827
2828#[allow(dead_code)]
2855pub fn sokalsneath<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2856 if point1.len() != point2.len() {
2857 return T::nan();
2858 }
2859
2860 let mut n_true_true = 0;
2861 let mut n_true_false = 0;
2862 let mut n_false_true = 0;
2863
2864 for i in 0..point1.len() {
2865 if point1[i] && point2[i] {
2866 n_true_true += 1;
2867 } else if point1[i] && !point2[i] {
2868 n_true_false += 1;
2869 } else if !point1[i] && point2[i] {
2870 n_false_true += 1;
2871 }
2872 }
2873
2874 let r = n_true_false + n_false_true;
2875
2876 let num = T::from(2 * r).expect("Operation failed");
2877 let denom = T::from(n_true_true + 2 * r).expect("Operation failed");
2878
2879 if denom > T::zero() {
2880 num / denom
2881 } else {
2882 T::zero()
2883 }
2884}
2885
2886#[allow(dead_code)]
2913pub fn yule<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2914 if point1.len() != point2.len() {
2915 return T::nan();
2916 }
2917
2918 let mut n_true_true = 0;
2919 let mut n_true_false = 0;
2920 let mut n_false_true = 0;
2921 let mut n_false_false = 0;
2922
2923 for i in 0..point1.len() {
2924 if point1[i] && point2[i] {
2925 n_true_true += 1;
2926 } else if point1[i] && !point2[i] {
2927 n_true_false += 1;
2928 } else if !point1[i] && point2[i] {
2929 n_false_true += 1;
2930 } else {
2931 n_false_false += 1;
2932 }
2933 }
2934
2935 let num = T::from(2 * n_true_false * n_false_true).expect("Operation failed");
2936 let denom = T::from(n_true_true * n_false_false + n_true_false * n_false_true)
2937 .expect("Operation failed");
2938
2939 if denom > T::zero() {
2940 num / denom
2941 } else {
2942 T::zero()
2943 }
2944}
2945
2946#[cfg(test)]
2947#[path = "distance_tests.rs"]
2948mod tests;