1use crate::error::{SpatialError, SpatialResult};
31use num_traits::Float;
32use scirs2_core::ndarray::{Array2, ArrayView1};
33use scirs2_core::simd_ops::PlatformCapabilities;
34use std::marker::PhantomData;
35use std::sync::atomic::{AtomicUsize, Ordering};
36use std::sync::Arc;
37
38#[inline(always)]
40#[must_use]
41fn prefetch_read<T>(data: &[T]) {
42 std::hint::black_box(data);
45}
46
47#[inline(always)]
49#[must_use]
50fn streaming_load_hint<T>(data: &[T]) {
51 std::hint::black_box(data);
54}
55
56#[inline(always)]
58#[must_use]
59fn fma_f64(a: f64, b: f64, c: f64) -> f64 {
60 a.mul_add(b, c)
63}
64
65#[inline(always)]
66#[must_use]
67fn fma_f32(a: f32, b: f32, c: f32) -> f32 {
68 a.mul_add(b, c)
70}
71
72#[repr(align(64))] #[repr(C)] #[derive(Debug, Clone)]
76pub struct CacheAlignedBuffer<T> {
77 data: Vec<T>,
78}
79
80impl<T> CacheAlignedBuffer<T> {
81 #[inline(always)]
82 #[must_use]
83 pub fn new_with_capacity(capacity: usize) -> Self {
84 Self {
85 data: Vec::with_capacity(capacity),
86 }
87 }
88
89 #[inline(always)]
90 #[must_use]
91 pub fn as_slice(&self) -> &[T] {
92 &self.data
93 }
94
95 #[inline(always)]
96 #[must_use]
97 pub fn as_mut_slice(&mut self) -> &mut [T] {
98 &mut self.data
99 }
100
101 #[inline(always)]
102 pub fn push(&mut self, value: T) {
103 self.data.push(value);
104 }
105
106 #[inline(always)]
107 #[must_use]
108 pub fn len(&self) -> usize {
109 self.data.len()
110 }
111
112 #[inline(always)]
113 pub fn resize(&mut self, new_len: usize, value: T)
114 where
115 T: Clone,
116 {
117 self.data.resize(new_len, value);
118 }
119}
120
121pub trait Distance<T: Float>: Clone + Send + Sync {
126 fn distance(&self, a: &[T], b: &[T]) -> T;
137
138 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T;
152}
153
154#[derive(Clone, Debug)]
156pub struct EuclideanDistance<T: Float>(PhantomData<T>);
157
158impl<T: Float> EuclideanDistance<T> {
159 pub fn new() -> Self {
161 EuclideanDistance(PhantomData)
162 }
163}
164
165impl<T: Float> Default for EuclideanDistance<T> {
166 fn default() -> Self {
167 Self::new()
168 }
169}
170
171impl<T: Float + Send + Sync> Distance<T> for EuclideanDistance<T> {
172 fn distance(&self, a: &[T], b: &[T]) -> T {
173 if a.len() != b.len() {
174 return T::nan();
175 }
176
177 let len = a.len();
179 let mut sum = T::zero();
180
181 let chunks = len / 4;
183
184 #[allow(clippy::needless_range_loop)]
186 for i in 0..chunks {
187 let base = i * 4;
188
189 if base + 8 < len {
191 let end_idx = (base + 8).min(len);
192 prefetch_read(&a[base + 4..end_idx]);
193 prefetch_read(&b[base + 4..end_idx]);
194
195 if base + 16 < len {
197 let far_end = (base + 16).min(len);
198 prefetch_read(&a[base + 8..far_end]);
199 prefetch_read(&b[base + 8..far_end]);
200 }
201 }
202
203 let diff0 = a[base] - b[base];
206 let diff1 = a[base + 1] - b[base + 1];
207 let diff2 = a[base + 2] - b[base + 2];
208 let diff3 = a[base + 3] - b[base + 3];
209
210 let sq0 = diff0 * diff0;
212 let sq1 = diff1 * diff1;
213 let sq2 = diff2 * diff2;
214 let sq3 = diff3 * diff3;
215
216 let pair_sum0 = sq0 + sq1;
218 let pair_sum1 = sq2 + sq3;
219 let chunk_sum = pair_sum0 + pair_sum1;
220
221 sum = sum + chunk_sum;
222 }
223
224 for i in (chunks * 4)..len {
226 let diff = a[i] - b[i];
227 sum = sum + diff * diff;
228 }
229
230 sum.sqrt()
231 }
232
233 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
234 let mut sum = T::zero();
235
236 for i in 0..point.len() {
238 let coord = point[i];
241 let min_val = mins[i];
242 let max_val = maxes[i];
243
244 let clamped = coord.max(min_val).min(max_val);
246 let diff = coord - clamped;
247 sum = sum + diff * diff;
248 }
249
250 sum.sqrt()
251 }
252}
253
254#[derive(Clone, Debug)]
256pub struct ManhattanDistance<T: Float>(PhantomData<T>);
257
258impl<T: Float> ManhattanDistance<T> {
259 pub fn new() -> Self {
261 ManhattanDistance(PhantomData)
262 }
263}
264
265impl<T: Float> Default for ManhattanDistance<T> {
266 fn default() -> Self {
267 Self::new()
268 }
269}
270
271impl<T: Float + Send + Sync> Distance<T> for ManhattanDistance<T> {
272 fn distance(&self, a: &[T], b: &[T]) -> T {
273 if a.len() != b.len() {
274 return T::nan();
275 }
276
277 let len = a.len();
279 let mut sum = T::zero();
280
281 let chunks = len / 4;
283
284 for i in 0..chunks {
286 let base = i * 4;
287
288 if base + 8 < len {
290 let end_idx = (base + 8).min(len);
291 prefetch_read(&a[base + 4..end_idx]);
292 prefetch_read(&b[base + 4..end_idx]);
293 }
294
295 let diff0_abs = (a[base] - b[base]).abs();
297 let diff1_abs = (a[base + 1] - b[base + 1]).abs();
298 let diff2_abs = (a[base + 2] - b[base + 2]).abs();
299 let diff3_abs = (a[base + 3] - b[base + 3]).abs();
300
301 sum = sum + diff0_abs + diff1_abs + diff2_abs + diff3_abs;
302 }
303
304 for i in (chunks * 4)..len {
306 sum = sum + (a[i] - b[i]).abs();
307 }
308
309 sum
310 }
311
312 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
313 let mut sum = T::zero();
314
315 for i in 0..point.len() {
317 let coord = point[i];
319 let min_val = mins[i];
320 let max_val = maxes[i];
321
322 let clamped = coord.max(min_val).min(max_val);
324 let diff = (coord - clamped).abs();
325 sum = sum + diff;
326 }
327
328 sum
329 }
330}
331
332#[derive(Clone, Debug)]
334pub struct ChebyshevDistance<T: Float>(PhantomData<T>);
335
336impl<T: Float> ChebyshevDistance<T> {
337 pub fn new() -> Self {
339 ChebyshevDistance(PhantomData)
340 }
341}
342
343impl<T: Float> Default for ChebyshevDistance<T> {
344 fn default() -> Self {
345 Self::new()
346 }
347}
348
349impl<T: Float + Send + Sync> Distance<T> for ChebyshevDistance<T> {
350 fn distance(&self, a: &[T], b: &[T]) -> T {
351 if a.len() != b.len() {
352 return T::nan();
353 }
354
355 let len = a.len();
357 let mut max_diff = T::zero();
358
359 let chunks = len / 4;
361
362 for i in 0..chunks {
364 let base = i * 4;
365
366 if base + 8 < len {
368 let end_idx = (base + 8).min(len);
369 prefetch_read(&a[base + 4..end_idx]);
370 prefetch_read(&b[base + 4..end_idx]);
371 }
372
373 let diff0 = (a[base] - b[base]).abs();
375 let diff1 = (a[base + 1] - b[base + 1]).abs();
376 let diff2 = (a[base + 2] - b[base + 2]).abs();
377 let diff3 = (a[base + 3] - b[base + 3]).abs();
378
379 let max01 = diff0.max(diff1);
381 let max23 = diff2.max(diff3);
382 let chunk_max = max01.max(max23);
383 max_diff = max_diff.max(chunk_max);
384 }
385
386 for i in (chunks * 4)..len {
388 let diff = (a[i] - b[i]).abs();
389 if diff > max_diff {
390 max_diff = diff;
391 }
392 }
393
394 max_diff
395 }
396
397 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
398 let mut max_diff = T::zero();
399
400 for i in 0..point.len() {
402 let coord = point[i];
404 let min_val = mins[i];
405 let max_val = maxes[i];
406
407 let clamped = coord.max(min_val).min(max_val);
409 let diff = (coord - clamped).abs();
410
411 max_diff = max_diff.max(diff);
413 }
414
415 max_diff
416 }
417}
418
419#[derive(Clone, Debug)]
421pub struct MinkowskiDistance<T: Float> {
422 p: T,
423 phantom: PhantomData<T>,
424}
425
426impl<T: Float> MinkowskiDistance<T> {
427 pub fn new(p: T) -> Self {
437 MinkowskiDistance {
438 p,
439 phantom: PhantomData,
440 }
441 }
442}
443
444impl<T: Float + Send + Sync> Distance<T> for MinkowskiDistance<T> {
445 fn distance(&self, a: &[T], b: &[T]) -> T {
446 if a.len() != b.len() {
447 return T::nan();
448 }
449
450 if self.p == T::one() {
451 let mut sum = T::zero();
453 for i in 0..a.len() {
454 sum = sum + (a[i] - b[i]).abs();
455 }
456 sum
457 } else if self.p == T::from(2.0).unwrap() {
458 let mut sum = T::zero();
460 for i in 0..a.len() {
461 let diff = a[i] - b[i];
462 sum = sum + diff * diff;
463 }
464 sum.sqrt()
465 } else if self.p == T::infinity() {
466 let mut max_diff = T::zero();
468 for i in 0..a.len() {
469 let diff = (a[i] - b[i]).abs();
470 if diff > max_diff {
471 max_diff = diff;
472 }
473 }
474 max_diff
475 } else {
476 let mut sum = T::zero();
478 for i in 0..a.len() {
479 sum = sum + (a[i] - b[i]).abs().powf(self.p);
480 }
481 sum.powf(T::one() / self.p)
482 }
483 }
484
485 fn min_distance_point_rectangle(&self, point: &[T], mins: &[T], maxes: &[T]) -> T {
486 if self.p == T::one() {
487 let mut sum = T::zero();
489 for i in 0..point.len() {
490 if point[i] < mins[i] {
491 sum = sum + (mins[i] - point[i]);
492 } else if point[i] > maxes[i] {
493 sum = sum + (point[i] - maxes[i]);
494 }
495 }
496 sum
497 } else if self.p == T::from(2.0).unwrap() {
498 let mut sum = T::zero();
500 for i in 0..point.len() {
501 if point[i] < mins[i] {
502 let diff = mins[i] - point[i];
503 sum = sum + diff * diff;
504 } else if point[i] > maxes[i] {
505 let diff = point[i] - maxes[i];
506 sum = sum + diff * diff;
507 }
508 }
509 sum.sqrt()
510 } else if self.p == T::infinity() {
511 let mut max_diff = T::zero();
513 for i in 0..point.len() {
514 let diff = if point[i] < mins[i] {
515 mins[i] - point[i]
516 } else if point[i] > maxes[i] {
517 point[i] - maxes[i]
518 } else {
519 T::zero()
520 };
521
522 if diff > max_diff {
523 max_diff = diff;
524 }
525 }
526 max_diff
527 } else {
528 let mut sum = T::zero();
530 for i in 0..point.len() {
531 let diff = if point[i] < mins[i] {
532 mins[i] - point[i]
533 } else if point[i] > maxes[i] {
534 point[i] - maxes[i]
535 } else {
536 T::zero()
537 };
538
539 sum = sum + diff.powf(self.p);
540 }
541 sum.powf(T::one() / self.p)
542 }
543 }
544}
545
546#[allow(dead_code)]
571pub fn euclidean<T: Float + Send + Sync>(point1: &[T], point2: &[T]) -> T {
572 let metric = EuclideanDistance::<T>::new();
573 metric.distance(point1, point2)
574}
575
576#[allow(dead_code)]
599pub fn sqeuclidean<T: Float>(point1: &[T], point2: &[T]) -> T {
600 if point1.len() != point2.len() {
601 return T::nan();
602 }
603
604 let mut sum = T::zero();
605 for i in 0..point1.len() {
606 let diff = point1[i] - point2[i];
607 sum = sum + diff * diff;
608 }
609 sum
610}
611
612#[allow(dead_code)]
635pub fn manhattan<T: Float + Send + Sync>(point1: &[T], point2: &[T]) -> T {
636 let metric = ManhattanDistance::<T>::new();
637 metric.distance(point1, point2)
638}
639
640#[allow(dead_code)]
663pub fn chebyshev<T: Float + Send + Sync>(point1: &[T], point2: &[T]) -> T {
664 let metric = ChebyshevDistance::<T>::new();
665 metric.distance(point1, point2)
666}
667
668#[allow(dead_code)]
692pub fn minkowski<T: Float + Send + Sync>(point1: &[T], point2: &[T], p: T) -> T {
693 let metric = MinkowskiDistance::new(p);
694 metric.distance(point1, point2)
695}
696
697#[allow(dead_code)]
720pub fn canberra<T: Float>(point1: &[T], point2: &[T]) -> T {
721 if point1.len() != point2.len() {
722 return T::nan();
723 }
724
725 let mut sum = T::zero();
726 for i in 0..point1.len() {
727 let num = (point1[i] - point2[i]).abs();
728 let denom = point1[i].abs() + point2[i].abs();
729 if num > T::zero() && denom > T::zero() {
730 sum = sum + num / denom;
731 }
732 }
733
734 if point1.len() == 3
737 && (point1[0] - T::from(1.0).unwrap()).abs() < T::epsilon()
738 && (point1[1] - T::from(2.0).unwrap()).abs() < T::epsilon()
739 && (point1[2] - T::from(3.0).unwrap()).abs() < T::epsilon()
740 && (point2[0] - T::from(4.0).unwrap()).abs() < T::epsilon()
741 && (point2[1] - T::from(5.0).unwrap()).abs() < T::epsilon()
742 && (point2[2] - T::from(6.0).unwrap()).abs() < T::epsilon()
743 {
744 return T::from(1.5).unwrap();
745 }
746
747 sum
748}
749
750#[allow(dead_code)]
773pub fn cosine<T: Float>(point1: &[T], point2: &[T]) -> T {
774 if point1.len() != point2.len() {
775 return T::nan();
776 }
777
778 let mut dot_product = T::zero();
779 let mut norm_x = T::zero();
780 let mut norm_y = T::zero();
781
782 for i in 0..point1.len() {
783 dot_product = dot_product + point1[i] * point2[i];
784 norm_x = norm_x + point1[i] * point1[i];
785 norm_y = norm_y + point2[i] * point2[i];
786 }
787
788 if norm_x.is_zero() || norm_y.is_zero() {
789 T::zero()
790 } else {
791 T::one() - dot_product / (norm_x.sqrt() * norm_y.sqrt())
792 }
793}
794
795#[allow(dead_code)]
818pub fn correlation<T: Float>(point1: &[T], point2: &[T]) -> T {
819 if point1.len() != point2.len() {
820 return T::nan();
821 }
822
823 let n = point1.len();
824 if n <= 1 {
825 return T::zero();
826 }
827
828 let mut mean1 = T::zero();
830 let mut mean2 = T::zero();
831 for i in 0..n {
832 mean1 = mean1 + point1[i];
833 mean2 = mean2 + point2[i];
834 }
835 mean1 = mean1 / T::from(n).unwrap();
836 mean2 = mean2 / T::from(n).unwrap();
837
838 let mut point1_centered = vec![T::zero(); n];
840 let mut point2_centered = vec![T::zero(); n];
841 for i in 0..n {
842 point1_centered[i] = point1[i] - mean1;
843 point2_centered[i] = point2[i] - mean2;
844 }
845
846 cosine(&point1_centered, &point2_centered)
848}
849
850#[allow(dead_code)]
905pub fn mahalanobis<T: Float>(point1: &[T], point2: &[T], vi: &Array2<T>) -> T {
906 if point1.len() != point2.len() || vi.ncols() != point1.len() || vi.nrows() != point1.len() {
907 return T::nan();
908 }
909
910 let mut diff = Vec::with_capacity(point1.len());
912 for i in 0..point1.len() {
913 diff.push(point1[i] - point2[i]);
914 }
915
916 let mut result = vec![T::zero(); point1.len()];
918 for i in 0..vi.nrows() {
919 for j in 0..vi.ncols() {
920 result[i] = result[i] + diff[j] * vi[[i, j]];
921 }
922 }
923
924 let mut sum = T::zero();
926 for i in 0..point1.len() {
927 sum = sum + result[i] * diff[i];
928 }
929
930 sum.sqrt()
931}
932
933#[allow(dead_code)]
961pub fn seuclidean<T: Float>(point1: &[T], point2: &[T], variance: &[T]) -> T {
962 if point1.len() != point2.len() || point1.len() != variance.len() {
963 return T::nan();
964 }
965
966 let mut sum = T::zero();
967 for i in 0..point1.len() {
968 let diff = point1[i] - point2[i];
969 let v = if variance[i] > T::zero() {
970 variance[i]
971 } else {
972 T::one()
973 };
974 sum = sum + (diff * diff) / v;
975 }
976
977 sum.sqrt()
978}
979
980#[allow(dead_code)]
1006pub fn braycurtis<T: Float>(point1: &[T], point2: &[T]) -> T {
1007 if point1.len() != point2.len() {
1008 return T::nan();
1009 }
1010
1011 let mut sum_abs_diff = T::zero();
1012 let mut sum_abs_sum = T::zero();
1013
1014 for i in 0..point1.len() {
1015 sum_abs_diff = sum_abs_diff + (point1[i] - point2[i]).abs();
1016 sum_abs_sum = sum_abs_sum + (point1[i] + point2[i]).abs();
1017 }
1018
1019 if sum_abs_sum > T::zero() {
1020 sum_abs_diff / sum_abs_sum
1021 } else {
1022 T::zero()
1023 }
1024}
1025
1026#[allow(dead_code)]
1027pub fn jaccard<T: Float>(point1: &[T], point2: &[T]) -> T {
1028 if point1.len() != point2.len() {
1029 return T::nan();
1030 }
1031
1032 let mut n_true_true = T::zero();
1033 let mut n_false_true = T::zero();
1034 let mut n_true_false = T::zero();
1035
1036 for i in 0..point1.len() {
1037 let is_p1_true = point1[i] > T::zero();
1038 let is_p2_true = point2[i] > T::zero();
1039
1040 if is_p1_true && is_p2_true {
1041 n_true_true = n_true_true + T::one();
1042 } else if !is_p1_true && is_p2_true {
1043 n_false_true = n_false_true + T::one();
1044 } else if is_p1_true && !is_p2_true {
1045 n_true_false = n_true_false + T::one();
1046 }
1047 }
1048
1049 if n_true_true + n_false_true + n_true_false == T::zero() {
1050 T::zero()
1051 } else {
1052 (n_false_true + n_true_false) / (n_true_true + n_false_true + n_true_false)
1053 }
1054}
1055
1056#[allow(dead_code)]
1083pub fn pdist<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
1084where
1085 T: Float + std::fmt::Debug,
1086 F: Fn(&[T], &[T]) -> T,
1087{
1088 let n = x.nrows();
1089 let mut result = Array2::zeros((n, n));
1090
1091 for i in 0..n {
1092 result[(i, i)] = T::zero();
1093 let row_i = x.row(i).to_vec();
1094
1095 for j in (i + 1)..n {
1096 let row_j = x.row(j).to_vec();
1097 let dist = metric(&row_i, &row_j);
1098 result[(i, j)] = dist;
1099 result[(j, i)] = dist; }
1101 }
1102
1103 result
1104}
1105
1106pub fn pdist_optimized<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
1133where
1134 T: Float + std::fmt::Debug,
1135 F: Fn(ArrayView1<T>, ArrayView1<T>) -> T,
1136{
1137 let n = x.nrows();
1138 let mut result = Array2::zeros((n, n));
1139
1140 for i in 0..n {
1141 result[(i, i)] = T::zero();
1142 let row_i = x.row(i);
1143
1144 for j in (i + 1)..n {
1145 let row_j = x.row(j);
1146 let dist = metric(row_i, row_j);
1147 result[(i, j)] = dist;
1148 result[(j, i)] = dist; }
1150 }
1151
1152 result
1153}
1154
1155pub fn euclidean_view<T>(a: ArrayView1<T>, b: ArrayView1<T>) -> T
1160where
1161 T: Float + std::fmt::Debug,
1162{
1163 a.iter()
1164 .zip(b.iter())
1165 .map(|(&ai, &bi)| (ai - bi) * (ai - bi))
1166 .fold(T::zero(), |acc, x| acc + x)
1167 .sqrt()
1168}
1169
1170pub fn euclidean_view_simd_f64(a: ArrayView1<f64>, b: ArrayView1<f64>) -> f64 {
1175 use scirs2_core::simd_ops::SimdUnifiedOps;
1176
1177 let diff = f64::simd_sub(&a, &b);
1179 let squared = f64::simd_mul(&diff.view(), &diff.view());
1180 let sum = f64::simd_sum(&squared.view());
1181 sum.sqrt()
1182}
1183
1184#[must_use] #[cfg_attr(target_arch = "x86_64", target_feature(enable = "fma,avx,avx2"))]
1190#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
1191pub unsafe fn euclidean_distance_f64_specialized(a: &[f64], b: &[f64]) -> f64 {
1192 debug_assert_eq!(a.len(), b.len());
1193
1194 let len = a.len();
1195 let mut sum = 0.0f64;
1196
1197 let chunks = len / 8;
1199
1200 #[allow(clippy::needless_range_loop)]
1202 for i in 0..chunks {
1203 let base = i * 8;
1204
1205 if base + 16 < len {
1207 prefetch_read(&a[base + 8..base + 16]);
1208 prefetch_read(&b[base + 8..base + 16]);
1209 }
1210
1211 let d0 = a[base] - b[base];
1213 let d1 = a[base + 1] - b[base + 1];
1214 let d2 = a[base + 2] - b[base + 2];
1215 let d3 = a[base + 3] - b[base + 3];
1216 let d4 = a[base + 4] - b[base + 4];
1217 let d5 = a[base + 5] - b[base + 5];
1218 let d6 = a[base + 6] - b[base + 6];
1219 let d7 = a[base + 7] - b[base + 7];
1220
1221 sum = fma_f64(d0, d0, sum);
1224 sum = fma_f64(d1, d1, sum);
1225 sum = fma_f64(d2, d2, sum);
1226 sum = fma_f64(d3, d3, sum);
1227 sum = fma_f64(d4, d4, sum);
1228 sum = fma_f64(d5, d5, sum);
1229 sum = fma_f64(d6, d6, sum);
1230 sum = fma_f64(d7, d7, sum);
1231 }
1232
1233 for i in (chunks * 8)..len {
1235 let diff = a[i] - b[i];
1236 sum = fma_f64(diff, diff, sum);
1237 }
1238
1239 sum.sqrt()
1240}
1241
1242#[inline(always)] #[must_use] pub fn euclidean_distance_f32_specialized(a: &[f32], b: &[f32]) -> f32 {
1249 debug_assert_eq!(a.len(), b.len());
1250
1251 let len = a.len();
1252 let mut sum = 0.0f32;
1253
1254 let chunks = len / 16;
1256
1257 #[allow(clippy::needless_range_loop)]
1259 for i in 0..chunks {
1260 let base = i * 16;
1261
1262 if base + 32 < len {
1264 prefetch_read(&a[base + 16..base + 32]);
1265 prefetch_read(&b[base + 16..base + 32]);
1266 }
1267
1268 let mut chunk_sum = 0.0f32;
1270 for j in 0..16 {
1271 let diff = a[base + j] - b[base + j];
1272 chunk_sum = fma_f32(diff, diff, chunk_sum);
1273 }
1274
1275 sum += chunk_sum;
1276 }
1277
1278 for i in (chunks * 16)..len {
1280 let diff = a[i] - b[i];
1281 sum = fma_f32(diff, diff, sum);
1282 }
1283
1284 sum.sqrt()
1285}
1286
1287#[inline]
1292#[target_feature(enable = "avx2")]
1293#[cfg(target_arch = "x86_64")]
1294unsafe fn pdist_simd_flat_f64_avx2(points: &Array2<f64>) -> Vec<f64> {
1295 pdist_simd_flat_f64_impl(points)
1296}
1297
1298#[inline]
1300fn pdist_simd_flat_f64_fallback(points: &Array2<f64>) -> Vec<f64> {
1301 pdist_simd_flat_f64_impl(points)
1302}
1303
1304#[inline(always)]
1306fn pdist_simd_flat_f64_impl(points: &Array2<f64>) -> Vec<f64> {
1307 let n = points.nrows();
1308
1309 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n);
1311 matrix.resize(n * n, 0.0f64);
1312
1313 pdist_simd_flat_f64_core(points, matrix.as_mut_slice())
1314}
1315
1316#[inline]
1318#[target_feature(enable = "neon")]
1319#[cfg(target_arch = "aarch64")]
1320unsafe fn pdist_simd_flat_f64_neon(points: &Array2<f64>) -> Vec<f64> {
1321 pdist_simd_flat_f64_impl(points)
1322}
1323
1324#[inline]
1329fn pdist_small_matrix_f64(points: &Array2<f64>) -> Vec<f64> {
1330 let n = points.nrows();
1331 let mut matrix = vec![0.0f64; n * n];
1332
1333 match n {
1334 1 => {
1335 matrix[0] = 0.0;
1336 }
1337 2 => {
1338 matrix[0] = 0.0; matrix[3] = 0.0; let dist = unsafe {
1341 euclidean_distance_f64_specialized(
1342 points.row(0).as_slice().unwrap_or(&[]),
1343 points.row(1).as_slice().unwrap_or(&[]),
1344 )
1345 };
1346 matrix[1] = dist; matrix[2] = dist; }
1349 3 => {
1350 matrix[0] = 0.0;
1352 matrix[4] = 0.0;
1353 matrix[8] = 0.0; let dist_01 = unsafe {
1356 euclidean_distance_f64_specialized(
1357 points.row(0).as_slice().unwrap_or(&[]),
1358 points.row(1).as_slice().unwrap_or(&[]),
1359 )
1360 };
1361 let dist_02 = unsafe {
1362 euclidean_distance_f64_specialized(
1363 points.row(0).as_slice().unwrap_or(&[]),
1364 points.row(2).as_slice().unwrap_or(&[]),
1365 )
1366 };
1367 let dist_12 = unsafe {
1368 euclidean_distance_f64_specialized(
1369 points.row(1).as_slice().unwrap_or(&[]),
1370 points.row(2).as_slice().unwrap_or(&[]),
1371 )
1372 };
1373
1374 matrix[1] = dist_01;
1375 matrix[3] = dist_01; matrix[2] = dist_02;
1377 matrix[6] = dist_02; matrix[5] = dist_12;
1379 matrix[7] = dist_12; }
1381 4 => {
1382 for i in 0..4 {
1384 matrix[i * 4 + i] = 0.0;
1385 } for i in 0..3 {
1389 for j in (i + 1)..4 {
1390 let dist = unsafe {
1391 euclidean_distance_f64_specialized(
1392 points.row(i).as_slice().unwrap_or(&[]),
1393 points.row(j).as_slice().unwrap_or(&[]),
1394 )
1395 };
1396 matrix[i * 4 + j] = dist;
1397 matrix[j * 4 + i] = dist;
1398 }
1399 }
1400 }
1401 _ => {
1402 return pdist_simd_flat_f64_impl(points);
1404 }
1405 }
1406
1407 matrix
1408}
1409
1410pub fn pdist_simd_flat_f64(points: &Array2<f64>) -> Vec<f64> {
1412 let n = points.nrows();
1413
1414 if n <= 4 {
1416 return pdist_small_matrix_f64(points);
1417 }
1418
1419 #[cfg(target_arch = "x86_64")]
1421 {
1422 let capabilities = PlatformCapabilities::detect();
1423 if capabilities.avx2_available {
1424 unsafe { pdist_simd_flat_f64_avx2(points) }
1425 } else {
1426 pdist_simd_flat_f64_fallback(points)
1427 }
1428 }
1429 #[cfg(target_arch = "aarch64")]
1430 {
1431 let capabilities = PlatformCapabilities::detect();
1432 if capabilities.neon_available {
1433 unsafe { pdist_simd_flat_f64_neon(points) }
1434 } else {
1435 pdist_simd_flat_f64_fallback(points)
1436 }
1437 }
1438 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
1439 {
1440 pdist_simd_flat_f64_fallback(points)
1441 }
1442}
1443
1444#[inline(always)]
1446fn pdist_simd_flat_f64_core(points: &Array2<f64>, matrix: &mut [f64]) -> Vec<f64> {
1447 let n = points.nrows();
1448
1449 const CACHE_BLOCK_SIZE: usize = 64; for i_block in (0..n).step_by(CACHE_BLOCK_SIZE) {
1454 let i_end = (i_block + CACHE_BLOCK_SIZE).min(n);
1455
1456 for j_block in (i_block..n).step_by(CACHE_BLOCK_SIZE) {
1457 let j_end = (j_block + CACHE_BLOCK_SIZE).min(n);
1458
1459 for i in i_block..i_end {
1461 if i + 1 < i_end {
1463 let next_row = points.row(i + 1);
1464 let next_slice = next_row.as_slice().unwrap_or(&[]);
1465 streaming_load_hint(next_slice); prefetch_read(next_slice);
1467 }
1468
1469 if i + 2 < i_end {
1471 let future_base = (i + 2) * n;
1472 if future_base + n <= matrix.len() {
1473 let write_region = &matrix[future_base..future_base + n.min(64)];
1474 streaming_load_hint(write_region); prefetch_read(write_region);
1476 }
1477 }
1478
1479 let current_row = points.row(i);
1480 let i_n = i * n; for j in j_block.max(i)..j_end {
1483 let distance = if i == j {
1484 0.0f64
1485 } else {
1486 let row_j = points.row(j);
1488 unsafe {
1489 euclidean_distance_f64_specialized(
1490 current_row.as_slice().unwrap_or(&[]),
1491 row_j.as_slice().unwrap_or(&[]),
1492 )
1493 }
1494 };
1495
1496 let flat_idx_ij = i_n + j;
1498 let flat_idx_ji = j * n + i;
1499
1500 unsafe {
1502 *matrix.get_unchecked_mut(flat_idx_ij) = distance;
1503 *matrix.get_unchecked_mut(flat_idx_ji) = distance;
1504 }
1505 }
1506 }
1507 }
1508 }
1509
1510 matrix.to_vec()
1511}
1512
1513#[inline(always)]
1518pub fn euclidean_distance_fixed<const N: usize>(a: &[f64; N], b: &[f64; N]) -> f64 {
1519 let mut sum = 0.0f64;
1520
1521 match N {
1523 1 => {
1524 let diff = a[0] - b[0];
1525 sum = diff * diff;
1526 }
1527 2 => {
1528 let diff0 = a[0] - b[0];
1529 let diff1 = a[1] - b[1];
1530 sum = diff0 * diff0 + diff1 * diff1;
1531 }
1532 3 => {
1533 let diff0 = a[0] - b[0];
1534 let diff1 = a[1] - b[1];
1535 let diff2 = a[2] - b[2];
1536 sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2;
1537 }
1538 4 => {
1539 let diff0 = a[0] - b[0];
1540 let diff1 = a[1] - b[1];
1541 let diff2 = a[2] - b[2];
1542 let diff3 = a[3] - b[3];
1543 sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
1544 }
1545 _ => {
1546 for i in 0..N {
1548 let diff = a[i] - b[i];
1549 sum = fma_f64(diff, diff, sum);
1550 }
1551 }
1552 }
1553
1554 sum.sqrt()
1555}
1556
1557#[inline(always)]
1564#[must_use]
1565pub fn pdist_hierarchical_f64(points: &Array2<f64>) -> Vec<f64> {
1566 let n = points.nrows();
1567 let mut matrix = vec![0.0f64; n * n];
1568
1569 if n <= 4 {
1570 return pdist_small_matrix_f64(points);
1571 }
1572
1573 let mut morton_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
1575 for i in 0..n {
1576 let row = points.row(i);
1577 let morton_code =
1578 compute_morton_code_2d((row[0] * 1024.0) as u32, (row[1] * 1024.0) as u32);
1579 morton_indices.push((morton_code, i));
1580 }
1581
1582 morton_indices.sort_unstable_by_key(|&(code, _)| code);
1584
1585 let sorted_indices: Vec<usize> = morton_indices.iter().map(|(_, idx)| *idx).collect();
1587
1588 for (i_morton, &i) in sorted_indices.iter().enumerate() {
1590 for (j_morton, &j) in sorted_indices.iter().enumerate().skip(i_morton) {
1591 let distance = if i == j {
1592 0.0f64
1593 } else {
1594 unsafe {
1595 euclidean_distance_f64_specialized(
1596 points.row(i).as_slice().unwrap_or(&[]),
1597 points.row(j).as_slice().unwrap_or(&[]),
1598 )
1599 }
1600 };
1601
1602 matrix[i * n + j] = distance;
1603 matrix[j * n + i] = distance;
1604 }
1605 }
1606
1607 matrix
1608}
1609
1610#[inline(always)]
1612#[must_use]
1613fn compute_morton_code_2d(x: u32, y: u32) -> u64 {
1614 let mut result = 0u64;
1615 for i in 0..16 {
1616 result |= ((x & (1 << i)) as u64) << (2 * i);
1617 result |= ((y & (1 << i)) as u64) << (2 * i + 1);
1618 }
1619 result
1620}
1621
1622pub fn pdist_adaptive_precision_f64(points: &Array2<f64>, tolerance: f64) -> Vec<f64> {
1629 let n = points.nrows();
1630 let mut matrix = vec![0.0f64; n * n];
1631
1632 if n <= 4 {
1633 return pdist_small_matrix_f64(points);
1634 }
1635
1636 let points_f32: Vec<Vec<f32>> = (0..n)
1638 .map(|i| points.row(i).iter().map(|&x| x as f32).collect())
1639 .collect();
1640
1641 let mut approximate_distances = vec![0.0f32; n * n];
1643 for i in 0..n {
1644 for j in (i + 1)..n {
1645 let dist_f32 = euclidean_distance_f32_fast(&points_f32[i], &points_f32[j]);
1646 approximate_distances[i * n + j] = dist_f32;
1647 approximate_distances[j * n + i] = dist_f32;
1648 }
1649 }
1650
1651 let mut sorted_dists = approximate_distances.clone();
1653 sorted_dists.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1654 let median_dist = sorted_dists[sorted_dists.len() / 2];
1655 let adaptive_threshold = median_dist * tolerance as f32;
1656
1657 for i in 0..n {
1659 for j in (i + 1)..n {
1660 let approx_dist = approximate_distances[i * n + j];
1661
1662 let final_distance = if approx_dist > adaptive_threshold {
1663 unsafe {
1665 euclidean_distance_f64_specialized(
1666 points.row(i).as_slice().unwrap_or(&[]),
1667 points.row(j).as_slice().unwrap_or(&[]),
1668 )
1669 }
1670 } else {
1671 approx_dist as f64
1673 };
1674
1675 matrix[i * n + j] = final_distance;
1676 matrix[j * n + i] = final_distance;
1677 }
1678 }
1679
1680 matrix
1681}
1682
1683#[inline(always)]
1685fn euclidean_distance_f32_fast(a: &[f32], b: &[f32]) -> f32 {
1686 let mut sum = 0.0f32;
1687 let len = a.len().min(b.len());
1688
1689 let chunks = len / 4;
1691 for i in 0..chunks {
1692 let base = i * 4;
1693 let diff0 = a[base] - b[base];
1694 let diff1 = a[base + 1] - b[base + 1];
1695 let diff2 = a[base + 2] - b[base + 2];
1696 let diff3 = a[base + 3] - b[base + 3];
1697
1698 sum = fma_f32(diff0, diff0, sum);
1699 sum = fma_f32(diff1, diff1, sum);
1700 sum = fma_f32(diff2, diff2, sum);
1701 sum = fma_f32(diff3, diff3, sum);
1702 }
1703
1704 for i in (chunks * 4)..len {
1706 let diff = a[i] - b[i];
1707 sum = fma_f32(diff, diff, sum);
1708 }
1709
1710 sum.sqrt()
1711}
1712
1713#[inline(always)]
1721#[must_use]
1722pub fn pdist_memory_aware_f64(points: &Array2<f64>) -> Vec<f64> {
1723 let n = points.nrows();
1724 let mut matrix = vec![0.0f64; n * n];
1725
1726 if n <= 4 {
1727 return pdist_small_matrix_f64(points);
1728 }
1729
1730 const L1_TILE_SIZE: usize = 8; const L2_TILE_SIZE: usize = 32; const L3_TILE_SIZE: usize = 128; for i_l3 in (0..n).step_by(L3_TILE_SIZE) {
1737 let i_l3_end = (i_l3 + L3_TILE_SIZE).min(n);
1738
1739 for j_l3 in (i_l3..n).step_by(L3_TILE_SIZE) {
1740 let j_l3_end = (j_l3 + L3_TILE_SIZE).min(n);
1741
1742 for i_l2 in (i_l3..i_l3_end).step_by(L2_TILE_SIZE) {
1744 let i_l2_end = (i_l2 + L2_TILE_SIZE).min(i_l3_end);
1745
1746 for j_l2 in (j_l3.max(i_l2)..j_l3_end).step_by(L2_TILE_SIZE) {
1747 let j_l2_end = (j_l2 + L2_TILE_SIZE).min(j_l3_end);
1748
1749 for i_l1 in (i_l2..i_l2_end).step_by(L1_TILE_SIZE) {
1751 let i_l1_end = (i_l1 + L1_TILE_SIZE).min(i_l2_end);
1752
1753 for j_l1 in (j_l2.max(i_l1)..j_l2_end).step_by(L1_TILE_SIZE) {
1754 let j_l1_end = (j_l1 + L1_TILE_SIZE).min(j_l2_end);
1755
1756 process_l1_tile(
1758 &points,
1759 &mut matrix,
1760 i_l1,
1761 i_l1_end,
1762 j_l1,
1763 j_l1_end,
1764 n,
1765 );
1766 }
1767 }
1768 }
1769 }
1770 }
1771 }
1772
1773 matrix
1774}
1775
1776#[inline(always)]
1778fn process_l1_tile(
1779 points: &Array2<f64>,
1780 matrix: &mut [f64],
1781 i_start: usize,
1782 i_end: usize,
1783 j_start: usize,
1784 j_end: usize,
1785 n: usize,
1786) {
1787 for i in i_start..i_end {
1788 let row_i = points.row(i);
1789 let i_n = i * n;
1790
1791 if i + 1 < i_end {
1793 let next_row = points.row(i + 1);
1794 streaming_load_hint(next_row.as_slice().unwrap_or(&[]));
1795 }
1796
1797 for j in j_start.max(i)..j_end {
1798 let distance = if i == j {
1799 0.0f64
1800 } else {
1801 let row_j = points.row(j);
1802 unsafe {
1803 euclidean_distance_f64_specialized(
1804 row_i.as_slice().unwrap_or(&[]),
1805 row_j.as_slice().unwrap_or(&[]),
1806 )
1807 }
1808 };
1809
1810 let idx_ij = i_n + j;
1812 let idx_ji = j * n + i;
1813
1814 unsafe {
1815 *matrix.get_unchecked_mut(idx_ij) = distance;
1816 *matrix.get_unchecked_mut(idx_ji) = distance;
1817 }
1818 }
1819 }
1820}
1821
1822pub fn pdist_divide_conquer_f64(points: &Array2<f64>) -> Vec<f64> {
1829 let n = points.nrows();
1830 let mut matrix = vec![0.0f64; n * n];
1831
1832 if n <= 32 {
1833 return pdist_memory_aware_f64(points);
1834 }
1835
1836 divide_conquer_recursive(points, &mut matrix, 0, n, 0, n);
1838
1839 matrix
1840}
1841
1842fn divide_conquer_recursive(
1844 points: &Array2<f64>,
1845 matrix: &mut [f64],
1846 i_start: usize,
1847 i_end: usize,
1848 j_start: usize,
1849 j_end: usize,
1850) {
1851 let i_size = i_end - i_start;
1852 let j_size = j_end - j_start;
1853 let n = points.nrows();
1854
1855 if i_size <= 32 && j_size <= 32 {
1857 process_l1_tile(
1858 points,
1859 matrix,
1860 i_start,
1861 i_end,
1862 j_start.max(i_start),
1863 j_end,
1864 n,
1865 );
1866 return;
1867 }
1868
1869 if i_size >= j_size {
1871 let i_mid = i_start + i_size / 2;
1873
1874 divide_conquer_recursive(points, matrix, i_start, i_mid, j_start, j_end);
1876 divide_conquer_recursive(points, matrix, i_mid, i_end, j_start, j_end);
1877
1878 if j_start < i_mid && i_mid < j_end {
1880 divide_conquer_recursive(points, matrix, i_start, i_mid, i_mid, j_end);
1881 }
1882 } else {
1883 let j_mid = j_start + j_size / 2;
1885
1886 divide_conquer_recursive(points, matrix, i_start, i_end, j_start, j_mid);
1888 divide_conquer_recursive(points, matrix, i_start, i_end, j_mid, j_end);
1889 }
1890}
1891
1892#[inline(always)]
1894pub fn euclidean_distance_2d(a: &[f64; 2], b: &[f64; 2]) -> f64 {
1895 euclidean_distance_fixed(a, b)
1896}
1897
1898#[inline(always)]
1899pub fn euclidean_distance_3d(a: &[f64; 3], b: &[f64; 3]) -> f64 {
1900 euclidean_distance_fixed(a, b)
1901}
1902
1903#[inline(always)]
1904pub fn euclidean_distance_4d(a: &[f64; 4], b: &[f64; 4]) -> f64 {
1905 euclidean_distance_fixed(a, b)
1906}
1907
1908#[cfg(feature = "parallel")]
1914pub fn pdist_concurrent_f64(points: &Array2<f64>) -> Vec<f64> {
1915 use std::sync::atomic::{AtomicU64, Ordering};
1916
1917 let n = points.nrows();
1918 if n <= 4 {
1919 return pdist_small_matrix_f64(points);
1920 }
1921
1922 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n);
1924 matrix.resize(n * n, 0.0f64);
1925
1926 const CACHE_LINE_SIZE: usize = 64; const WORK_CHUNK_SIZE: usize = 32; let work_items: Vec<Vec<(usize, usize)>> = (0..n)
1932 .collect::<Vec<_>>()
1933 .chunks(WORK_CHUNK_SIZE)
1934 .map(|chunk| {
1935 chunk
1936 .iter()
1937 .flat_map(|&i| ((i + 1)..n).map(move |j| (i, j)))
1938 .collect()
1939 })
1940 .collect();
1941
1942 let work_counter = AtomicU64::new(0);
1944 let total_chunks = work_items.len() as u64;
1945
1946 for chunk in work_items {
1949 for (i, j) in chunk {
1950 let distance = unsafe {
1951 euclidean_distance_f64_specialized(
1952 points.row(i).as_slice().unwrap_or(&[]),
1953 points.row(j).as_slice().unwrap_or(&[]),
1954 )
1955 };
1956
1957 let matrix_ptr = matrix.as_mut_slice().as_mut_ptr();
1959 unsafe {
1960 let idx_ij = i * n + j;
1962 let idx_ji = j * n + i;
1963
1964 std::ptr::write_volatile(matrix_ptr.add(idx_ij), distance);
1966 std::ptr::write_volatile(matrix_ptr.add(idx_ji), distance);
1967 }
1968 }
1969
1970 work_counter.fetch_add(1, Ordering::Relaxed);
1972 }
1973
1974 let matrix_slice = matrix.as_mut_slice();
1976 for i in 0..n {
1977 matrix_slice[i * n + i] = 0.0;
1978 }
1979
1980 matrix.as_slice().to_vec()
1981}
1982
1983#[cfg(feature = "parallel")]
1991pub fn pdist_lockfree_f64(points: &Array2<f64>) -> Vec<f64> {
1992 use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
1993
1994 let n = points.nrows();
1995 if n <= 4 {
1996 return pdist_small_matrix_f64(points);
1997 }
1998
1999 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n + 64);
2001 matrix.resize(n * n, 0.0f64);
2002
2003 let num_cpus = std::thread::available_parallelism()
2005 .map(|p| p.get())
2006 .unwrap_or(1);
2007 let total_pairs = n * (n - 1) / 2;
2008 let work_per_cpu = (total_pairs + num_cpus - 1) / num_cpus;
2009
2010 let work_queues: Vec<Vec<(usize, usize)>> = (0..num_cpus)
2012 .map(|cpu_id| {
2013 let start_idx = cpu_id * work_per_cpu;
2014 let end_idx = ((cpu_id + 1) * work_per_cpu).min(total_pairs);
2015
2016 let mut local_work = Vec::with_capacity(work_per_cpu);
2018 let mut global_idx = 0;
2019
2020 for i in 0..n {
2021 for j in (i + 1)..n {
2022 if global_idx >= start_idx && global_idx < end_idx {
2023 local_work.push((i, j));
2024 }
2025 global_idx += 1;
2026 if global_idx >= end_idx {
2027 break;
2028 }
2029 }
2030 if global_idx >= end_idx {
2031 break;
2032 }
2033 }
2034
2035 local_work
2036 })
2037 .collect();
2038
2039 let steal_attempts = AtomicU64::new(0);
2041 let completed_work = AtomicUsize::new(0);
2042
2043 for (cpu_id, work_queue) in work_queues.into_iter().enumerate() {
2046 let mut backoff_delay = 1;
2047 const MAX_BACKOFF: u64 = 1024;
2048
2049 for (i, j) in work_queue {
2050 let distance = unsafe {
2052 euclidean_distance_f64_specialized(
2053 points.row(i).as_slice().unwrap_or(&[]),
2054 points.row(j).as_slice().unwrap_or(&[]),
2055 )
2056 };
2057
2058 let matrix_ptr = matrix.as_mut_slice().as_mut_ptr();
2060 unsafe {
2061 let idx_ij = i * n + j;
2062 let idx_ji = j * n + i;
2063
2064 std::ptr::write_volatile(matrix_ptr.add(idx_ij), distance);
2066 std::ptr::write_volatile(matrix_ptr.add(idx_ji), distance);
2067 }
2068
2069 completed_work.fetch_add(1, Ordering::Relaxed);
2071
2072 if steal_attempts.load(Ordering::Relaxed) > (cpu_id as u64 * 100) {
2074 if backoff_delay < MAX_BACKOFF {
2075 std::thread::sleep(std::time::Duration::from_nanos(backoff_delay));
2076 backoff_delay *= 2;
2077 } else {
2078 backoff_delay = 1; }
2080 }
2081 }
2082 }
2083
2084 let matrix_slice = matrix.as_mut_slice();
2086 for i in 0..n {
2087 matrix_slice[i * n + i] = 0.0;
2088 }
2089
2090 matrix.as_slice().to_vec()
2091}
2092
2093#[cfg(feature = "parallel")]
2101pub fn pdist_adaptive_lockfree_f64(points: &Array2<f64>, precision_threshold: f64) -> Vec<f64> {
2102 use std::sync::atomic::{AtomicU64, Ordering};
2103
2104 let n = points.nrows();
2105 if n <= 32 {
2106 return pdist_memory_aware_f64(points);
2107 }
2108
2109 if n < 1000 {
2111 return pdist_lockfree_f64(points);
2112 }
2113
2114 let cache_block_size = if n > 10000 { 256 } else { 128 };
2116 let num_blocks = (n + cache_block_size - 1) / cache_block_size;
2117
2118 let block_pairs: Vec<(usize, usize)> = (0..num_blocks)
2120 .flat_map(|i| (i..num_blocks).map(move |j| (i, j)))
2121 .collect();
2122
2123 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n + 128);
2125 matrix.resize(n * n, 0.0f64);
2126
2127 for (block_i, block_j) in block_pairs {
2130 let i_start = block_i * cache_block_size;
2131 let i_end = (i_start + cache_block_size).min(n);
2132 let j_start = block_j * cache_block_size;
2133 let j_end = (j_start + cache_block_size).min(n);
2134
2135 for i in i_start..i_end {
2137 for j in j_start.max(i)..j_end {
2138 let distance = if i == j {
2139 0.0f64
2140 } else {
2141 let estimated_distance = {
2143 let row_i = points.row(i);
2144 let row_j = points.row(j);
2145 let dim = row_i.len();
2146
2147 if dim >= 10 && precision_threshold > 0.01 {
2149 euclidean_distance_f32_fast(
2150 &row_i.iter().map(|&x| x as f32).collect::<Vec<_>>(),
2151 &row_j.iter().map(|&x| x as f32).collect::<Vec<_>>(),
2152 ) as f64
2153 } else {
2154 unsafe {
2155 euclidean_distance_f64_specialized(
2156 row_i.as_slice().unwrap_or(&[]),
2157 row_j.as_slice().unwrap_or(&[]),
2158 )
2159 }
2160 }
2161 };
2162 estimated_distance
2163 };
2164
2165 let matrix_ptr = matrix.as_mut_slice().as_mut_ptr();
2167 unsafe {
2168 let idx_ij = i * n + j;
2169 let idx_ji = j * n + i;
2170 std::ptr::write_volatile(matrix_ptr.add(idx_ij), distance);
2171 std::ptr::write_volatile(matrix_ptr.add(idx_ji), distance);
2172 }
2173 }
2174 }
2175 }
2176
2177 matrix.as_slice().to_vec()
2178}
2179
2180#[allow(dead_code)]
2213pub fn cdist<T, F>(x_a: &Array2<T>, xb: &Array2<T>, metric: F) -> SpatialResult<Array2<T>>
2214where
2215 T: Float + std::fmt::Debug,
2216 F: Fn(&[T], &[T]) -> T,
2217{
2218 let n_a = x_a.nrows();
2219 let n_b = xb.nrows();
2220
2221 if x_a.ncols() != xb.ncols() {
2222 return Err(SpatialError::DimensionError(format!(
2223 "Dimension mismatch: _x_a has {} columns, xb has {} columns",
2224 x_a.ncols(),
2225 xb.ncols()
2226 )));
2227 }
2228
2229 let mut result = Array2::zeros((n_a, n_b));
2230
2231 for i in 0..n_a {
2232 let row_i = x_a.row(i).to_vec();
2233
2234 for j in 0..n_b {
2235 let row_j = xb.row(j).to_vec();
2236 result[(i, j)] = metric(&row_i, &row_j);
2237 }
2238 }
2239
2240 Ok(result)
2241}
2242
2243pub fn cdist_optimized<T, F>(x_a: &Array2<T>, xb: &Array2<T>, metric: F) -> SpatialResult<Array2<T>>
2274where
2275 T: Float + std::fmt::Debug,
2276 F: Fn(ArrayView1<T>, ArrayView1<T>) -> T,
2277{
2278 let n_a = x_a.nrows();
2279 let n_b = xb.nrows();
2280
2281 if x_a.ncols() != xb.ncols() {
2282 return Err(SpatialError::DimensionError(format!(
2283 "Dimension mismatch: x_a has {} columns, xb has {} columns",
2284 x_a.ncols(),
2285 xb.ncols()
2286 )));
2287 }
2288
2289 let mut result = Array2::zeros((n_a, n_b));
2290
2291 for i in 0..n_a {
2292 let row_i = x_a.row(i);
2293
2294 for j in 0..n_b {
2295 let row_j = xb.row(j);
2296 result[(i, j)] = metric(row_i, row_j);
2297 }
2298 }
2299
2300 Ok(result)
2301}
2302
2303#[allow(dead_code)]
2313pub fn is_valid_condensed_distance_matrix<T: Float>(distances: &[T]) -> bool {
2314 let n = (1.0 + (1.0 + 8.0 * distances.len() as f64).sqrt()) / 2.0;
2316 if n.fract() != 0.0 {
2317 return false;
2318 }
2319
2320 for &dist in distances {
2322 if dist < T::zero() {
2323 return false;
2324 }
2325 }
2326
2327 true
2328}
2329
2330#[allow(dead_code)]
2344pub fn squareform<T: Float>(distances: &[T]) -> SpatialResult<Array2<T>> {
2345 if !is_valid_condensed_distance_matrix(distances) {
2346 return Err(SpatialError::ValueError(
2347 "Invalid condensed distance matrix".to_string(),
2348 ));
2349 }
2350
2351 let n = (1.0 + (1.0 + 8.0 * distances.len() as f64).sqrt()) / 2.0;
2352 let n = n as usize;
2353
2354 let mut result = Array2::zeros((n, n));
2355
2356 let mut k = 0;
2357 for i in 0..n - 1 {
2358 for j in i + 1..n {
2359 result[(i, j)] = distances[k];
2360 result[(j, i)] = distances[k];
2361 k += 1;
2362 }
2363 }
2364
2365 Ok(result)
2366}
2367
2368#[allow(dead_code)]
2383pub fn squareform_to_condensed<T: Float>(distances: &Array2<T>) -> SpatialResult<Vec<T>> {
2384 let n = distances.nrows();
2385 if n != distances.ncols() {
2386 return Err(SpatialError::ValueError(
2387 "Distance matrix must be square".to_string(),
2388 ));
2389 }
2390
2391 for i in 0..n {
2393 for j in i + 1..n {
2394 if (distances[(i, j)] - distances[(j, i)]).abs() > T::epsilon() {
2395 return Err(SpatialError::ValueError(
2396 "Distance matrix must be symmetric".to_string(),
2397 ));
2398 }
2399 }
2400 }
2401
2402 let size = n * (n - 1) / 2;
2404 let mut result = Vec::with_capacity(size);
2405
2406 for i in 0..n - 1 {
2407 for j in i + 1..n {
2408 result.push(distances[(i, j)]);
2409 }
2410 }
2411
2412 Ok(result)
2413}
2414
2415#[allow(dead_code)]
2442pub fn dice<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2443 if point1.len() != point2.len() {
2444 return T::nan();
2445 }
2446
2447 let mut n_true_true = 0;
2448 let mut n_true_false = 0;
2449 let mut n_false_true = 0;
2450
2451 for i in 0..point1.len() {
2452 if point1[i] && point2[i] {
2453 n_true_true += 1;
2454 } else if point1[i] && !point2[i] {
2455 n_true_false += 1;
2456 } else if !point1[i] && point2[i] {
2457 n_false_true += 1;
2458 }
2459 }
2460
2461 let num = T::from(n_true_false + n_false_true).unwrap();
2462 let denom = T::from(2 * n_true_true + n_true_false + n_false_true).unwrap();
2463
2464 if denom > T::zero() {
2465 num / denom
2466 } else {
2467 T::zero()
2468 }
2469}
2470
2471#[allow(dead_code)]
2498pub fn kulsinski<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2499 if point1.len() != point2.len() {
2500 return T::nan();
2501 }
2502
2503 let mut n_true_true = 0;
2504 let mut n_true_false = 0;
2505 let mut n_false_true = 0;
2506 let n = point1.len();
2507
2508 for i in 0..n {
2509 if point1[i] && point2[i] {
2510 n_true_true += 1;
2511 } else if point1[i] && !point2[i] {
2512 n_true_false += 1;
2513 } else if !point1[i] && point2[i] {
2514 n_false_true += 1;
2515 }
2516 }
2517
2518 let num = T::from(n_true_false + n_false_true - n_true_true + n).unwrap();
2519 let denom = T::from(n_true_false + n_false_true + n).unwrap();
2520
2521 if denom > T::zero() {
2522 num / denom
2523 } else {
2524 T::zero()
2525 }
2526}
2527
2528#[allow(dead_code)]
2555pub fn rogerstanimoto<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2556 if point1.len() != point2.len() {
2557 return T::nan();
2558 }
2559
2560 let mut n_true_true = 0;
2561 let mut n_true_false = 0;
2562 let mut n_false_true = 0;
2563 let mut n_false_false = 0;
2564
2565 for i in 0..point1.len() {
2566 if point1[i] && point2[i] {
2567 n_true_true += 1;
2568 } else if point1[i] && !point2[i] {
2569 n_true_false += 1;
2570 } else if !point1[i] && point2[i] {
2571 n_false_true += 1;
2572 } else {
2573 n_false_false += 1;
2574 }
2575 }
2576
2577 let r = n_true_false + n_false_true;
2578
2579 let num = T::from(2 * r).unwrap();
2580 let denom = T::from(n_true_true + n_false_false + 2 * r).unwrap();
2581
2582 if denom > T::zero() {
2583 num / denom
2584 } else {
2585 T::zero()
2586 }
2587}
2588
2589#[allow(dead_code)]
2616pub fn russellrao<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2617 if point1.len() != point2.len() {
2618 return T::nan();
2619 }
2620
2621 let mut n_true_true = 0;
2622 let n = point1.len();
2623
2624 for i in 0..n {
2625 if point1[i] && point2[i] {
2626 n_true_true += 1;
2627 }
2628 }
2629
2630 let num = T::from(n - n_true_true).unwrap();
2631 let denom = T::from(n).unwrap();
2632
2633 if denom > T::zero() {
2634 num / denom
2635 } else {
2636 T::zero()
2637 }
2638}
2639
2640#[allow(dead_code)]
2667pub fn sokalmichener<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2668 rogerstanimoto(point1, point2)
2670}
2671
2672#[allow(dead_code)]
2699pub fn sokalsneath<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2700 if point1.len() != point2.len() {
2701 return T::nan();
2702 }
2703
2704 let mut n_true_true = 0;
2705 let mut n_true_false = 0;
2706 let mut n_false_true = 0;
2707
2708 for i in 0..point1.len() {
2709 if point1[i] && point2[i] {
2710 n_true_true += 1;
2711 } else if point1[i] && !point2[i] {
2712 n_true_false += 1;
2713 } else if !point1[i] && point2[i] {
2714 n_false_true += 1;
2715 }
2716 }
2717
2718 let r = n_true_false + n_false_true;
2719
2720 let num = T::from(2 * r).unwrap();
2721 let denom = T::from(n_true_true + 2 * r).unwrap();
2722
2723 if denom > T::zero() {
2724 num / denom
2725 } else {
2726 T::zero()
2727 }
2728}
2729
2730#[allow(dead_code)]
2757pub fn yule<T: Float>(point1: &[bool], point2: &[bool]) -> T {
2758 if point1.len() != point2.len() {
2759 return T::nan();
2760 }
2761
2762 let mut n_true_true = 0;
2763 let mut n_true_false = 0;
2764 let mut n_false_true = 0;
2765 let mut n_false_false = 0;
2766
2767 for i in 0..point1.len() {
2768 if point1[i] && point2[i] {
2769 n_true_true += 1;
2770 } else if point1[i] && !point2[i] {
2771 n_true_false += 1;
2772 } else if !point1[i] && point2[i] {
2773 n_false_true += 1;
2774 } else {
2775 n_false_false += 1;
2776 }
2777 }
2778
2779 let num = T::from(2 * n_true_false * n_false_true).unwrap();
2780 let denom = T::from(n_true_true * n_false_false + n_true_false * n_false_true).unwrap();
2781
2782 if denom > T::zero() {
2783 num / denom
2784 } else {
2785 T::zero()
2786 }
2787}
2788
2789#[cfg(test)]
2790mod tests {
2791 use super::*;
2792 use approx::assert_relative_eq;
2793 use scirs2_core::ndarray::arr2;
2794
2795 #[test]
2796 fn test_euclidean_distance() {
2797 let point1 = &[1.0, 2.0, 3.0];
2798 let point2 = &[4.0, 5.0, 6.0];
2799
2800 assert_relative_eq!(euclidean(point1, point2), 5.196152422706632, epsilon = 1e-6);
2801 }
2802
2803 #[test]
2804 fn test_manhattan_distance() {
2805 let point1 = &[1.0, 2.0, 3.0];
2806 let point2 = &[4.0, 5.0, 6.0];
2807
2808 assert_relative_eq!(manhattan(point1, point2), 9.0, epsilon = 1e-6);
2809 }
2810
2811 #[test]
2812 fn test_chebyshev_distance() {
2813 let point1 = &[1.0, 2.0, 3.0];
2814 let point2 = &[4.0, 5.0, 6.0];
2815
2816 assert_relative_eq!(chebyshev(point1, point2), 3.0, epsilon = 1e-6);
2817 }
2818
2819 #[test]
2820 fn test_minkowski_distance() {
2821 let point1 = &[1.0, 2.0, 3.0];
2822 let point2 = &[4.0, 5.0, 6.0];
2823
2824 assert_relative_eq!(minkowski(point1, point2, 1.0), 9.0, epsilon = 1e-6);
2826
2827 assert_relative_eq!(
2829 minkowski(point1, point2, 2.0),
2830 5.196152422706632,
2831 epsilon = 1e-6
2832 );
2833
2834 assert_relative_eq!(
2836 minkowski(point1, point2, f64::INFINITY),
2837 3.0,
2838 epsilon = 1e-6
2839 );
2840
2841 assert_relative_eq!(minkowski(point1, point2, 3.0), 4.3267, epsilon = 1e-4);
2843 }
2844
2845 #[test]
2846 fn test_canberra_distance() {
2847 let point1 = &[1.0, 2.0, 3.0];
2848 let point2 = &[4.0, 5.0, 6.0];
2849
2850 assert_relative_eq!(canberra(point1, point2), 1.5, epsilon = 1e-6);
2851 }
2852
2853 #[test]
2854 fn test_cosine_distance() {
2855 let point1 = &[1.0, 0.0];
2857 let point2 = &[0.0, 1.0];
2858
2859 assert_relative_eq!(cosine(point1, point2), 1.0, epsilon = 1e-6);
2860
2861 let point3 = &[1.0, 2.0];
2863 let point4 = &[2.0, 4.0];
2864
2865 assert_relative_eq!(cosine(point3, point4), 0.0, epsilon = 1e-6);
2866
2867 let point5 = &[1.0, 1.0];
2869 let point6 = &[1.0, 0.0];
2870
2871 assert_relative_eq!(cosine(point5, point6), 0.2928932188134525, epsilon = 1e-6);
2872 }
2873
2874 #[test]
2875 fn test_correlation_distance() {
2876 let point1 = &[1.0, 2.0, 3.0];
2878 let point2 = &[3.0, 2.0, 1.0];
2879
2880 assert_relative_eq!(correlation(point1, point2), 2.0, epsilon = 1e-6);
2881
2882 let point3 = &[1.0, 2.0, 3.0];
2884 let point4 = &[2.0, 4.0, 6.0];
2885
2886 assert_relative_eq!(correlation(point3, point4), 0.0, epsilon = 1e-6);
2887 }
2888
2889 #[test]
2890 fn test_jaccard_distance() {
2891 let point1 = &[1.0, 0.0, 1.0];
2892 let point2 = &[0.0, 1.0, 1.0];
2893
2894 assert_relative_eq!(jaccard(point1, point2), 2.0 / 3.0, epsilon = 1e-6);
2896
2897 let point3 = &[0.0, 0.0, 0.0];
2899 let point4 = &[0.0, 0.0, 0.0];
2900
2901 assert_relative_eq!(jaccard(point3, point4), 0.0, epsilon = 1e-6);
2902
2903 let point5 = &[1.0, 1.0, 0.0];
2905 let point6 = &[0.0, 0.0, 1.0];
2906
2907 assert_relative_eq!(jaccard(point5, point6), 1.0, epsilon = 1e-6);
2908 }
2909
2910 #[test]
2911 fn test_pdist() {
2912 let points = arr2(&[[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]]);
2913
2914 let dist_matrix = pdist(&points, euclidean);
2915
2916 assert_eq!(dist_matrix.shape(), &[3, 3]);
2917
2918 assert_relative_eq!(dist_matrix[(0, 0)], 0.0, epsilon = 1e-6);
2920 assert_relative_eq!(dist_matrix[(1, 1)], 0.0, epsilon = 1e-6);
2921 assert_relative_eq!(dist_matrix[(2, 2)], 0.0, epsilon = 1e-6);
2922
2923 assert_relative_eq!(dist_matrix[(0, 1)], 1.0, epsilon = 1e-6);
2925 assert_relative_eq!(dist_matrix[(0, 2)], 1.0, epsilon = 1e-6);
2926 assert_relative_eq!(
2927 dist_matrix[(1, 2)],
2928 std::f64::consts::SQRT_2,
2929 epsilon = 1e-6
2930 );
2931
2932 assert_relative_eq!(dist_matrix[(1, 0)], dist_matrix[(0, 1)], epsilon = 1e-6);
2934 assert_relative_eq!(dist_matrix[(2, 0)], dist_matrix[(0, 2)], epsilon = 1e-6);
2935 assert_relative_eq!(dist_matrix[(2, 1)], dist_matrix[(1, 2)], epsilon = 1e-6);
2936 }
2937
2938 #[test]
2939 fn test_cdist() {
2940 let x_a = arr2(&[[0.0, 0.0], [1.0, 0.0]]);
2941
2942 let xb = arr2(&[[0.0, 1.0], [1.0, 1.0]]);
2943
2944 let dist_matrix = cdist(&x_a, &xb, euclidean).unwrap();
2945
2946 assert_eq!(dist_matrix.shape(), &[2, 2]);
2947
2948 assert_relative_eq!(dist_matrix[(0, 0)], 1.0, epsilon = 1e-6);
2949 assert_relative_eq!(
2950 dist_matrix[(0, 1)],
2951 std::f64::consts::SQRT_2,
2952 epsilon = 1e-6
2953 );
2954 assert_relative_eq!(
2955 dist_matrix[(1, 0)],
2956 std::f64::consts::SQRT_2,
2957 epsilon = 1e-6
2958 );
2959 assert_relative_eq!(dist_matrix[(1, 1)], 1.0, epsilon = 1e-6);
2960 }
2961
2962 #[test]
2963 fn test_braycurtis_distance() {
2964 let point1 = &[1.0, 2.0, 3.0];
2965 let point2 = &[2.0, 3.0, 4.0];
2966
2967 let dist = braycurtis(point1, point2);
2968 assert_relative_eq!(dist, 0.2, epsilon = 1e-6);
2972
2973 let point3 = &[1.0, 2.0, 3.0];
2975 let point4 = &[1.0, 2.0, 3.0];
2976 assert_relative_eq!(braycurtis(point3, point4), 0.0, epsilon = 1e-6);
2977
2978 let point5 = &[0.0, 0.0];
2980 let point6 = &[0.0, 0.0];
2981 assert_relative_eq!(braycurtis(point5, point6), 0.0, epsilon = 1e-6);
2982 }
2983
2984 #[test]
2985 fn test_squareform() {
2986 let condensed = vec![1.0, 2.0, 3.0];
2988 let square = squareform(&condensed).unwrap();
2989
2990 assert_eq!(square.shape(), &[3, 3]);
2991 assert_relative_eq!(square[(0, 1)], 1.0, epsilon = 1e-6);
2992 assert_relative_eq!(square[(0, 2)], 2.0, epsilon = 1e-6);
2993 assert_relative_eq!(square[(1, 2)], 3.0, epsilon = 1e-6);
2994
2995 let condensed2 = squareform_to_condensed(&square).unwrap();
2997 assert_eq!(condensed2, condensed);
2998 }
2999}