1use scirs2_core::ndarray::{Array2, ArrayView1};
6use scirs2_core::numeric::Float;
7use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
8
9use super::functions::Distance;
10use super::functions::{fma_f32, fma_f64, prefetch_read, streaming_load_hint};
11use super::types::CacheAlignedBuffer;
12
13#[allow(dead_code)]
14pub fn jaccard<T: Float>(point1: &[T], point2: &[T]) -> T {
15 if point1.len() != point2.len() {
16 return T::nan();
17 }
18 let mut n_true_true = T::zero();
19 let mut n_false_true = T::zero();
20 let mut n_true_false = T::zero();
21 for i in 0..point1.len() {
22 let is_p1_true = point1[i] > T::zero();
23 let is_p2_true = point2[i] > T::zero();
24 if is_p1_true && is_p2_true {
25 n_true_true = n_true_true + T::one();
26 } else if !is_p1_true && is_p2_true {
27 n_false_true = n_false_true + T::one();
28 } else if is_p1_true && !is_p2_true {
29 n_true_false = n_true_false + T::one();
30 }
31 }
32 if n_true_true + n_false_true + n_true_false == T::zero() {
33 T::zero()
34 } else {
35 (n_false_true + n_true_false) / (n_true_true + n_false_true + n_true_false)
36 }
37}
38#[allow(dead_code)]
65pub fn pdist<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
66where
67 T: Float + std::fmt::Debug,
68 F: Fn(&[T], &[T]) -> T,
69{
70 let n = x.nrows();
71 let mut result = Array2::zeros((n, n));
72 for i in 0..n {
73 result[(i, i)] = T::zero();
74 let row_i = x.row(i).to_vec();
75 for j in (i + 1)..n {
76 let row_j = x.row(j).to_vec();
77 let dist = metric(&row_i, &row_j);
78 result[(i, j)] = dist;
79 result[(j, i)] = dist;
80 }
81 }
82 result
83}
84pub fn pdist_optimized<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
111where
112 T: Float + std::fmt::Debug,
113 F: Fn(ArrayView1<T>, ArrayView1<T>) -> T,
114{
115 let n = x.nrows();
116 let mut result = Array2::zeros((n, n));
117 for i in 0..n {
118 result[(i, i)] = T::zero();
119 let row_i = x.row(i);
120 for j in (i + 1)..n {
121 let row_j = x.row(j);
122 let dist = metric(row_i, row_j);
123 result[(i, j)] = dist;
124 result[(j, i)] = dist;
125 }
126 }
127 result
128}
129pub fn euclidean_view<T>(a: ArrayView1<T>, b: ArrayView1<T>) -> T
134where
135 T: Float + std::fmt::Debug,
136{
137 a.iter()
138 .zip(b.iter())
139 .map(|(&ai, &bi)| (ai - bi) * (ai - bi))
140 .fold(T::zero(), |acc, x| acc + x)
141 .sqrt()
142}
143pub fn euclidean_view_simd_f64(a: ArrayView1<f64>, b: ArrayView1<f64>) -> f64 {
148 use scirs2_core::simd_ops::SimdUnifiedOps;
149 let diff = f64::simd_sub(&a, &b);
150 let squared = f64::simd_mul(&diff.view(), &diff.view());
151 let sum = f64::simd_sum(&squared.view());
152 sum.sqrt()
153}
154#[must_use]
159#[cfg_attr(target_arch = "x86_64", target_feature(enable = "fma,avx,avx2"))]
160#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
161pub unsafe fn euclidean_distance_f64_specialized(a: &[f64], b: &[f64]) -> f64 {
162 debug_assert_eq!(a.len(), b.len());
163 let len = a.len();
164 let mut sum = 0.0f64;
165 let chunks = len / 8;
166 #[allow(clippy::needless_range_loop)]
167 for i in 0..chunks {
168 let base = i * 8;
169 if base + 16 < len {
170 prefetch_read(&a[base + 8..base + 16]);
171 prefetch_read(&b[base + 8..base + 16]);
172 }
173 let d0 = a[base] - b[base];
174 let d1 = a[base + 1] - b[base + 1];
175 let d2 = a[base + 2] - b[base + 2];
176 let d3 = a[base + 3] - b[base + 3];
177 let d4 = a[base + 4] - b[base + 4];
178 let d5 = a[base + 5] - b[base + 5];
179 let d6 = a[base + 6] - b[base + 6];
180 let d7 = a[base + 7] - b[base + 7];
181 sum = fma_f64(d0, d0, sum);
182 sum = fma_f64(d1, d1, sum);
183 sum = fma_f64(d2, d2, sum);
184 sum = fma_f64(d3, d3, sum);
185 sum = fma_f64(d4, d4, sum);
186 sum = fma_f64(d5, d5, sum);
187 sum = fma_f64(d6, d6, sum);
188 sum = fma_f64(d7, d7, sum);
189 }
190 for i in (chunks * 8)..len {
191 let diff = a[i] - b[i];
192 sum = fma_f64(diff, diff, sum);
193 }
194 sum.sqrt()
195}
196#[inline(always)]
201#[must_use]
202pub fn euclidean_distance_f32_specialized(a: &[f32], b: &[f32]) -> f32 {
203 debug_assert_eq!(a.len(), b.len());
204 let len = a.len();
205 let mut sum = 0.0f32;
206 let chunks = len / 16;
207 #[allow(clippy::needless_range_loop)]
208 for i in 0..chunks {
209 let base = i * 16;
210 if base + 32 < len {
211 prefetch_read(&a[base + 16..base + 32]);
212 prefetch_read(&b[base + 16..base + 32]);
213 }
214 let mut chunk_sum = 0.0f32;
215 for j in 0..16 {
216 let diff = a[base + j] - b[base + j];
217 chunk_sum = fma_f32(diff, diff, chunk_sum);
218 }
219 sum += chunk_sum;
220 }
221 for i in (chunks * 16)..len {
222 let diff = a[i] - b[i];
223 sum = fma_f32(diff, diff, sum);
224 }
225 sum.sqrt()
226}
227#[inline]
232#[target_feature(enable = "avx2")]
233#[cfg(target_arch = "x86_64")]
234unsafe fn pdist_simd_flat_f64_avx2(points: &Array2<f64>) -> Vec<f64> {
235 pdist_simd_flat_f64_impl(points)
236}
237#[inline]
239fn pdist_simd_flat_f64_fallback(points: &Array2<f64>) -> Vec<f64> {
240 pdist_simd_flat_f64_impl(points)
241}
242#[inline(always)]
244fn pdist_simd_flat_f64_impl(points: &Array2<f64>) -> Vec<f64> {
245 let n = points.nrows();
246 let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n);
247 matrix.resize(n * n, 0.0f64);
248 pdist_simd_flat_f64_core(points, matrix.as_mut_slice())
249}
250#[inline]
252#[target_feature(enable = "neon")]
253#[cfg(target_arch = "aarch64")]
254unsafe fn pdist_simd_flat_f64_neon(points: &Array2<f64>) -> Vec<f64> {
255 pdist_simd_flat_f64_impl(points)
256}
257#[inline]
262pub(super) fn pdist_small_matrix_f64(points: &Array2<f64>) -> Vec<f64> {
263 let n = points.nrows();
264 let mut matrix = vec![0.0f64; n * n];
265 match n {
266 1 => {
267 matrix[0] = 0.0;
268 }
269 2 => {
270 matrix[0] = 0.0;
271 matrix[3] = 0.0;
272 let dist = unsafe {
273 euclidean_distance_f64_specialized(
274 points.row(0).as_slice().unwrap_or(&[]),
275 points.row(1).as_slice().unwrap_or(&[]),
276 )
277 };
278 matrix[1] = dist;
279 matrix[2] = dist;
280 }
281 3 => {
282 matrix[0] = 0.0;
283 matrix[4] = 0.0;
284 matrix[8] = 0.0;
285 let dist_01 = unsafe {
286 euclidean_distance_f64_specialized(
287 points.row(0).as_slice().unwrap_or(&[]),
288 points.row(1).as_slice().unwrap_or(&[]),
289 )
290 };
291 let dist_02 = unsafe {
292 euclidean_distance_f64_specialized(
293 points.row(0).as_slice().unwrap_or(&[]),
294 points.row(2).as_slice().unwrap_or(&[]),
295 )
296 };
297 let dist_12 = unsafe {
298 euclidean_distance_f64_specialized(
299 points.row(1).as_slice().unwrap_or(&[]),
300 points.row(2).as_slice().unwrap_or(&[]),
301 )
302 };
303 matrix[1] = dist_01;
304 matrix[3] = dist_01;
305 matrix[2] = dist_02;
306 matrix[6] = dist_02;
307 matrix[5] = dist_12;
308 matrix[7] = dist_12;
309 }
310 4 => {
311 for i in 0..4 {
312 matrix[i * 4 + i] = 0.0;
313 }
314 for i in 0..3 {
315 for j in (i + 1)..4 {
316 let dist = unsafe {
317 euclidean_distance_f64_specialized(
318 points.row(i).as_slice().unwrap_or(&[]),
319 points.row(j).as_slice().unwrap_or(&[]),
320 )
321 };
322 matrix[i * 4 + j] = dist;
323 matrix[j * 4 + i] = dist;
324 }
325 }
326 }
327 _ => {
328 return pdist_simd_flat_f64_impl(points);
329 }
330 }
331 matrix
332}
333pub fn pdist_simd_flat_f64(points: &Array2<f64>) -> Vec<f64> {
335 let n = points.nrows();
336 if n <= 4 {
337 return pdist_small_matrix_f64(points);
338 }
339 #[cfg(target_arch = "x86_64")]
340 {
341 let capabilities = PlatformCapabilities::detect();
342 if capabilities.avx2_available {
343 unsafe { pdist_simd_flat_f64_avx2(points) }
344 } else {
345 pdist_simd_flat_f64_fallback(points)
346 }
347 }
348 #[cfg(target_arch = "aarch64")]
349 {
350 let capabilities = PlatformCapabilities::detect();
351 if capabilities.neon_available {
352 unsafe { pdist_simd_flat_f64_neon(points) }
353 } else {
354 pdist_simd_flat_f64_fallback(points)
355 }
356 }
357 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
358 {
359 pdist_simd_flat_f64_fallback(points)
360 }
361}
362#[inline(always)]
364fn pdist_simd_flat_f64_core(points: &Array2<f64>, matrix: &mut [f64]) -> Vec<f64> {
365 let n = points.nrows();
366 const CACHE_BLOCK_SIZE: usize = 64;
367 for i_block in (0..n).step_by(CACHE_BLOCK_SIZE) {
368 let i_end = (i_block + CACHE_BLOCK_SIZE).min(n);
369 for j_block in (i_block..n).step_by(CACHE_BLOCK_SIZE) {
370 let j_end = (j_block + CACHE_BLOCK_SIZE).min(n);
371 for i in i_block..i_end {
372 if i + 1 < i_end {
373 let next_row = points.row(i + 1);
374 let next_slice = next_row.as_slice().unwrap_or(&[]);
375 streaming_load_hint(next_slice);
376 prefetch_read(next_slice);
377 }
378 if i + 2 < i_end {
379 let future_base = (i + 2) * n;
380 if future_base + n <= matrix.len() {
381 let write_region = &matrix[future_base..future_base + n.min(64)];
382 streaming_load_hint(write_region);
383 prefetch_read(write_region);
384 }
385 }
386 let current_row = points.row(i);
387 let i_n = i * n;
388 for j in j_block.max(i)..j_end {
389 let distance = if i == j {
390 0.0f64
391 } else {
392 let row_j = points.row(j);
393 unsafe {
394 euclidean_distance_f64_specialized(
395 current_row.as_slice().unwrap_or(&[]),
396 row_j.as_slice().unwrap_or(&[]),
397 )
398 }
399 };
400 let flat_idx_ij = i_n + j;
401 let flat_idx_ji = j * n + i;
402 unsafe {
403 *matrix.get_unchecked_mut(flat_idx_ij) = distance;
404 *matrix.get_unchecked_mut(flat_idx_ji) = distance;
405 }
406 }
407 }
408 }
409 }
410 matrix.to_vec()
411}
412#[inline(always)]
417pub fn euclidean_distance_fixed<const N: usize>(a: &[f64; N], b: &[f64; N]) -> f64 {
418 let mut sum = 0.0f64;
419 match N {
420 1 => {
421 let diff = a[0] - b[0];
422 sum = diff * diff;
423 }
424 2 => {
425 let diff0 = a[0] - b[0];
426 let diff1 = a[1] - b[1];
427 sum = diff0 * diff0 + diff1 * diff1;
428 }
429 3 => {
430 let diff0 = a[0] - b[0];
431 let diff1 = a[1] - b[1];
432 let diff2 = a[2] - b[2];
433 sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2;
434 }
435 4 => {
436 let diff0 = a[0] - b[0];
437 let diff1 = a[1] - b[1];
438 let diff2 = a[2] - b[2];
439 let diff3 = a[3] - b[3];
440 sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
441 }
442 _ => {
443 for i in 0..N {
444 let diff = a[i] - b[i];
445 sum = fma_f64(diff, diff, sum);
446 }
447 }
448 }
449 sum.sqrt()
450}
451#[inline(always)]
458#[must_use]
459pub fn pdist_hierarchical_f64(points: &Array2<f64>) -> Vec<f64> {
460 let n = points.nrows();
461 let mut matrix = vec![0.0f64; n * n];
462 if n <= 4 {
463 return pdist_small_matrix_f64(points);
464 }
465 let mut morton_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
466 for i in 0..n {
467 let row = points.row(i);
468 let morton_code =
469 compute_morton_code_2d((row[0] * 1024.0) as u32, (row[1] * 1024.0) as u32);
470 morton_indices.push((morton_code, i));
471 }
472 morton_indices.sort_unstable_by_key(|&(code, _)| code);
473 let sorted_indices: Vec<usize> = morton_indices.iter().map(|(_, idx)| *idx).collect();
474 for (i_morton, &i) in sorted_indices.iter().enumerate() {
475 for (j_morton, &j) in sorted_indices.iter().enumerate().skip(i_morton) {
476 let distance = if i == j {
477 0.0f64
478 } else {
479 unsafe {
480 euclidean_distance_f64_specialized(
481 points.row(i).as_slice().unwrap_or(&[]),
482 points.row(j).as_slice().unwrap_or(&[]),
483 )
484 }
485 };
486 matrix[i * n + j] = distance;
487 matrix[j * n + i] = distance;
488 }
489 }
490 matrix
491}
492#[inline(always)]
494#[must_use]
495fn compute_morton_code_2d(x: u32, y: u32) -> u64 {
496 let mut result = 0u64;
497 for i in 0..16 {
498 result |= ((x & (1 << i)) as u64) << (2 * i);
499 result |= ((y & (1 << i)) as u64) << (2 * i + 1);
500 }
501 result
502}