scirs2_spatial/distance/
functions_2.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use scirs2_core::ndarray::{Array2, ArrayView1};
6use scirs2_core::numeric::Float;
7use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
8
9use super::functions::Distance;
10use super::functions::{fma_f32, fma_f64, prefetch_read, streaming_load_hint};
11use super::types::CacheAlignedBuffer;
12
13#[allow(dead_code)]
14pub fn jaccard<T: Float>(point1: &[T], point2: &[T]) -> T {
15    if point1.len() != point2.len() {
16        return T::nan();
17    }
18    let mut n_true_true = T::zero();
19    let mut n_false_true = T::zero();
20    let mut n_true_false = T::zero();
21    for i in 0..point1.len() {
22        let is_p1_true = point1[i] > T::zero();
23        let is_p2_true = point2[i] > T::zero();
24        if is_p1_true && is_p2_true {
25            n_true_true = n_true_true + T::one();
26        } else if !is_p1_true && is_p2_true {
27            n_false_true = n_false_true + T::one();
28        } else if is_p1_true && !is_p2_true {
29            n_true_false = n_true_false + T::one();
30        }
31    }
32    if n_true_true + n_false_true + n_true_false == T::zero() {
33        T::zero()
34    } else {
35        (n_false_true + n_true_false) / (n_true_true + n_false_true + n_true_false)
36    }
37}
38/// Compute a distance matrix between two sets of points
39///
40/// # Arguments
41///
42/// * `x_a` - First set of points
43/// * `xb` - Second set of points
44/// * `metric` - Distance metric to use
45///
46/// # Returns
47///
48/// * Distance matrix with shape (x_a.nrows(), xb.nrows())
49///
50/// # Examples
51///
52/// ```
53/// use scirs2_spatial::distance::{pdist, euclidean};
54/// use scirs2_core::ndarray::array;
55///
56/// let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
57/// let dist_matrix = pdist(&points, euclidean);
58///
59/// assert_eq!(dist_matrix.shape(), &[3, 3]);
60/// assert!((dist_matrix[(0, 1)] - 1.0f64).abs() < 1e-6);
61/// assert!((dist_matrix[(0, 2)] - 1.0f64).abs() < 1e-6);
62/// assert!((dist_matrix[(1, 2)] - std::f64::consts::SQRT_2).abs() < 1e-6);
63/// ```
64#[allow(dead_code)]
65pub fn pdist<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
66where
67    T: Float + std::fmt::Debug,
68    F: Fn(&[T], &[T]) -> T,
69{
70    let n = x.nrows();
71    let mut result = Array2::zeros((n, n));
72    for i in 0..n {
73        result[(i, i)] = T::zero();
74        let row_i = x.row(i).to_vec();
75        for j in (i + 1)..n {
76            let row_j = x.row(j).to_vec();
77            let dist = metric(&row_i, &row_j);
78            result[(i, j)] = dist;
79            result[(j, i)] = dist;
80        }
81    }
82    result
83}
84/// Compute a distance matrix between points (optimized zero-allocation version)
85///
86/// This function avoids memory allocations by working directly with array views,
87/// providing significant performance improvements over the standard pdist function.
88///
89/// # Arguments
90///
91/// * `x` - Input matrix where each row is a point
92/// * `metric` - Distance metric function that operates on ArrayView1
93///
94/// # Returns
95///
96/// * Symmetric distance matrix with shape (n, n)
97///
98/// # Examples
99///
100/// ```
101/// use scirs2_spatial::distance::{pdist_optimized, euclidean_view};
102/// use scirs2_core::ndarray::array;
103///
104/// let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
105/// let dist_matrix = pdist_optimized(&points, euclidean_view);
106///
107/// assert_eq!(dist_matrix.shape(), &[3, 3]);
108/// assert!((dist_matrix[(0, 1)] - 1.0f64).abs() < 1e-6);
109/// ```
110pub fn pdist_optimized<T, F>(x: &Array2<T>, metric: F) -> Array2<T>
111where
112    T: Float + std::fmt::Debug,
113    F: Fn(ArrayView1<T>, ArrayView1<T>) -> T,
114{
115    let n = x.nrows();
116    let mut result = Array2::zeros((n, n));
117    for i in 0..n {
118        result[(i, i)] = T::zero();
119        let row_i = x.row(i);
120        for j in (i + 1)..n {
121            let row_j = x.row(j);
122            let dist = metric(row_i, row_j);
123            result[(i, j)] = dist;
124            result[(j, i)] = dist;
125        }
126    }
127    result
128}
129/// Euclidean distance function that operates on ArrayView1 (zero-allocation)
130///
131/// This is an optimized version of euclidean distance that works directly
132/// with array views without requiring vector conversions.
133pub fn euclidean_view<T>(a: ArrayView1<T>, b: ArrayView1<T>) -> T
134where
135    T: Float + std::fmt::Debug,
136{
137    a.iter()
138        .zip(b.iter())
139        .map(|(&ai, &bi)| (ai - bi) * (ai - bi))
140        .fold(T::zero(), |acc, x| acc + x)
141        .sqrt()
142}
143/// SIMD-optimized euclidean distance for f64 using scirs2_core operations
144///
145/// This function leverages SIMD acceleration when working with f64 arrays
146/// for maximum performance in distance computations.
147pub fn euclidean_view_simd_f64(a: ArrayView1<f64>, b: ArrayView1<f64>) -> f64 {
148    use scirs2_core::simd_ops::SimdUnifiedOps;
149    let diff = f64::simd_sub(&a, &b);
150    let squared = f64::simd_mul(&diff.view(), &diff.view());
151    let sum = f64::simd_sum(&squared.view());
152    sum.sqrt()
153}
154/// Ultra-optimized f64 Euclidean distance with comprehensive compiler optimizations
155///
156/// This function uses all available CPU features and compiler optimizations
157/// to make aggressive optimizations and use the best instruction sequences.
158#[must_use]
159#[cfg_attr(target_arch = "x86_64", target_feature(enable = "fma,avx,avx2"))]
160#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
161pub unsafe fn euclidean_distance_f64_specialized(a: &[f64], b: &[f64]) -> f64 {
162    debug_assert_eq!(a.len(), b.len());
163    let len = a.len();
164    let mut sum = 0.0f64;
165    let chunks = len / 8;
166    #[allow(clippy::needless_range_loop)]
167    for i in 0..chunks {
168        let base = i * 8;
169        if base + 16 < len {
170            prefetch_read(&a[base + 8..base + 16]);
171            prefetch_read(&b[base + 8..base + 16]);
172        }
173        let d0 = a[base] - b[base];
174        let d1 = a[base + 1] - b[base + 1];
175        let d2 = a[base + 2] - b[base + 2];
176        let d3 = a[base + 3] - b[base + 3];
177        let d4 = a[base + 4] - b[base + 4];
178        let d5 = a[base + 5] - b[base + 5];
179        let d6 = a[base + 6] - b[base + 6];
180        let d7 = a[base + 7] - b[base + 7];
181        sum = fma_f64(d0, d0, sum);
182        sum = fma_f64(d1, d1, sum);
183        sum = fma_f64(d2, d2, sum);
184        sum = fma_f64(d3, d3, sum);
185        sum = fma_f64(d4, d4, sum);
186        sum = fma_f64(d5, d5, sum);
187        sum = fma_f64(d6, d6, sum);
188        sum = fma_f64(d7, d7, sum);
189    }
190    for i in (chunks * 8)..len {
191        let diff = a[i] - b[i];
192        sum = fma_f64(diff, diff, sum);
193    }
194    sum.sqrt()
195}
196/// Ultra-optimized f32 Euclidean distance with comprehensive compiler optimizations
197///
198/// This function is hyper-optimized for f32 specifically, taking advantage
199/// of wider SIMD registers and different instruction costs with maximum compiler optimizations.
200#[inline(always)]
201#[must_use]
202pub fn euclidean_distance_f32_specialized(a: &[f32], b: &[f32]) -> f32 {
203    debug_assert_eq!(a.len(), b.len());
204    let len = a.len();
205    let mut sum = 0.0f32;
206    let chunks = len / 16;
207    #[allow(clippy::needless_range_loop)]
208    for i in 0..chunks {
209        let base = i * 16;
210        if base + 32 < len {
211            prefetch_read(&a[base + 16..base + 32]);
212            prefetch_read(&b[base + 16..base + 32]);
213        }
214        let mut chunk_sum = 0.0f32;
215        for j in 0..16 {
216            let diff = a[base + j] - b[base + j];
217            chunk_sum = fma_f32(diff, diff, chunk_sum);
218        }
219        sum += chunk_sum;
220    }
221    for i in (chunks * 16)..len {
222        let diff = a[i] - b[i];
223        sum = fma_f32(diff, diff, sum);
224    }
225    sum.sqrt()
226}
227/// Ultra-high performance distance matrix with advanced cache optimization
228///
229/// This implements cache-blocking, memory prefetching, and SIMD acceleration
230/// for maximum performance on large datasets.
231#[inline]
232#[target_feature(enable = "avx2")]
233#[cfg(target_arch = "x86_64")]
234unsafe fn pdist_simd_flat_f64_avx2(points: &Array2<f64>) -> Vec<f64> {
235    pdist_simd_flat_f64_impl(points)
236}
237/// Fallback implementation for non-AVX2 targets
238#[inline]
239fn pdist_simd_flat_f64_fallback(points: &Array2<f64>) -> Vec<f64> {
240    pdist_simd_flat_f64_impl(points)
241}
242/// Core implementation shared between optimized and fallback versions
243#[inline(always)]
244fn pdist_simd_flat_f64_impl(points: &Array2<f64>) -> Vec<f64> {
245    let n = points.nrows();
246    let mut matrix = CacheAlignedBuffer::new_with_capacity(n * n);
247    matrix.resize(n * n, 0.0f64);
248    pdist_simd_flat_f64_core(points, matrix.as_mut_slice())
249}
250/// ARM NEON optimized implementation
251#[inline]
252#[target_feature(enable = "neon")]
253#[cfg(target_arch = "aarch64")]
254unsafe fn pdist_simd_flat_f64_neon(points: &Array2<f64>) -> Vec<f64> {
255    pdist_simd_flat_f64_impl(points)
256}
257/// Optimized small matrix distance computation for tiny datasets
258///
259/// Uses completely unrolled loops and inline computations for maximum performance
260/// on small matrices where the overhead of general algorithms is significant.
261#[inline]
262pub(super) fn pdist_small_matrix_f64(points: &Array2<f64>) -> Vec<f64> {
263    let n = points.nrows();
264    let mut matrix = vec![0.0f64; n * n];
265    match n {
266        1 => {
267            matrix[0] = 0.0;
268        }
269        2 => {
270            matrix[0] = 0.0;
271            matrix[3] = 0.0;
272            let dist = unsafe {
273                euclidean_distance_f64_specialized(
274                    points.row(0).as_slice().unwrap_or(&[]),
275                    points.row(1).as_slice().unwrap_or(&[]),
276                )
277            };
278            matrix[1] = dist;
279            matrix[2] = dist;
280        }
281        3 => {
282            matrix[0] = 0.0;
283            matrix[4] = 0.0;
284            matrix[8] = 0.0;
285            let dist_01 = unsafe {
286                euclidean_distance_f64_specialized(
287                    points.row(0).as_slice().unwrap_or(&[]),
288                    points.row(1).as_slice().unwrap_or(&[]),
289                )
290            };
291            let dist_02 = unsafe {
292                euclidean_distance_f64_specialized(
293                    points.row(0).as_slice().unwrap_or(&[]),
294                    points.row(2).as_slice().unwrap_or(&[]),
295                )
296            };
297            let dist_12 = unsafe {
298                euclidean_distance_f64_specialized(
299                    points.row(1).as_slice().unwrap_or(&[]),
300                    points.row(2).as_slice().unwrap_or(&[]),
301                )
302            };
303            matrix[1] = dist_01;
304            matrix[3] = dist_01;
305            matrix[2] = dist_02;
306            matrix[6] = dist_02;
307            matrix[5] = dist_12;
308            matrix[7] = dist_12;
309        }
310        4 => {
311            for i in 0..4 {
312                matrix[i * 4 + i] = 0.0;
313            }
314            for i in 0..3 {
315                for j in (i + 1)..4 {
316                    let dist = unsafe {
317                        euclidean_distance_f64_specialized(
318                            points.row(i).as_slice().unwrap_or(&[]),
319                            points.row(j).as_slice().unwrap_or(&[]),
320                        )
321                    };
322                    matrix[i * 4 + j] = dist;
323                    matrix[j * 4 + i] = dist;
324                }
325            }
326        }
327        _ => {
328            return pdist_simd_flat_f64_impl(points);
329        }
330    }
331    matrix
332}
333/// Public interface that dispatches to the best available implementation
334pub fn pdist_simd_flat_f64(points: &Array2<f64>) -> Vec<f64> {
335    let n = points.nrows();
336    if n <= 4 {
337        return pdist_small_matrix_f64(points);
338    }
339    #[cfg(target_arch = "x86_64")]
340    {
341        let capabilities = PlatformCapabilities::detect();
342        if capabilities.avx2_available {
343            unsafe { pdist_simd_flat_f64_avx2(points) }
344        } else {
345            pdist_simd_flat_f64_fallback(points)
346        }
347    }
348    #[cfg(target_arch = "aarch64")]
349    {
350        let capabilities = PlatformCapabilities::detect();
351        if capabilities.neon_available {
352            unsafe { pdist_simd_flat_f64_neon(points) }
353        } else {
354            pdist_simd_flat_f64_fallback(points)
355        }
356    }
357    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
358    {
359        pdist_simd_flat_f64_fallback(points)
360    }
361}
362/// Core computation kernel shared by all implementations
363#[inline(always)]
364fn pdist_simd_flat_f64_core(points: &Array2<f64>, matrix: &mut [f64]) -> Vec<f64> {
365    let n = points.nrows();
366    const CACHE_BLOCK_SIZE: usize = 64;
367    for i_block in (0..n).step_by(CACHE_BLOCK_SIZE) {
368        let i_end = (i_block + CACHE_BLOCK_SIZE).min(n);
369        for j_block in (i_block..n).step_by(CACHE_BLOCK_SIZE) {
370            let j_end = (j_block + CACHE_BLOCK_SIZE).min(n);
371            for i in i_block..i_end {
372                if i + 1 < i_end {
373                    let next_row = points.row(i + 1);
374                    let next_slice = next_row.as_slice().unwrap_or(&[]);
375                    streaming_load_hint(next_slice);
376                    prefetch_read(next_slice);
377                }
378                if i + 2 < i_end {
379                    let future_base = (i + 2) * n;
380                    if future_base + n <= matrix.len() {
381                        let write_region = &matrix[future_base..future_base + n.min(64)];
382                        streaming_load_hint(write_region);
383                        prefetch_read(write_region);
384                    }
385                }
386                let current_row = points.row(i);
387                let i_n = i * n;
388                for j in j_block.max(i)..j_end {
389                    let distance = if i == j {
390                        0.0f64
391                    } else {
392                        let row_j = points.row(j);
393                        unsafe {
394                            euclidean_distance_f64_specialized(
395                                current_row.as_slice().unwrap_or(&[]),
396                                row_j.as_slice().unwrap_or(&[]),
397                            )
398                        }
399                    };
400                    let flat_idx_ij = i_n + j;
401                    let flat_idx_ji = j * n + i;
402                    unsafe {
403                        *matrix.get_unchecked_mut(flat_idx_ij) = distance;
404                        *matrix.get_unchecked_mut(flat_idx_ji) = distance;
405                    }
406                }
407            }
408        }
409    }
410    matrix.to_vec()
411}
412/// Ultra-optimized distance computation for small, fixed-size vectors
413///
414/// This function uses const generics to enable aggressive compile-time optimizations
415/// for common small dimensions (2D, 3D, 4D, etc.).
416#[inline(always)]
417pub fn euclidean_distance_fixed<const N: usize>(a: &[f64; N], b: &[f64; N]) -> f64 {
418    let mut sum = 0.0f64;
419    match N {
420        1 => {
421            let diff = a[0] - b[0];
422            sum = diff * diff;
423        }
424        2 => {
425            let diff0 = a[0] - b[0];
426            let diff1 = a[1] - b[1];
427            sum = diff0 * diff0 + diff1 * diff1;
428        }
429        3 => {
430            let diff0 = a[0] - b[0];
431            let diff1 = a[1] - b[1];
432            let diff2 = a[2] - b[2];
433            sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2;
434        }
435        4 => {
436            let diff0 = a[0] - b[0];
437            let diff1 = a[1] - b[1];
438            let diff2 = a[2] - b[2];
439            let diff3 = a[3] - b[3];
440            sum = diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
441        }
442        _ => {
443            for i in 0..N {
444                let diff = a[i] - b[i];
445                sum = fma_f64(diff, diff, sum);
446            }
447        }
448    }
449    sum.sqrt()
450}
451/// Hierarchical clustering-aware distance computation with compiler optimizations
452///
453/// This algorithm exploits spatial locality in clustered datasets by:
454/// 1. Pre-sorting points by Morton codes (Z-order curve)
455/// 2. Computing distances in spatial order to maximize cache hits
456/// 3. Using early termination for sparse distance matrices
457#[inline(always)]
458#[must_use]
459pub fn pdist_hierarchical_f64(points: &Array2<f64>) -> Vec<f64> {
460    let n = points.nrows();
461    let mut matrix = vec![0.0f64; n * n];
462    if n <= 4 {
463        return pdist_small_matrix_f64(points);
464    }
465    let mut morton_indices: Vec<(u64, usize)> = Vec::with_capacity(n);
466    for i in 0..n {
467        let row = points.row(i);
468        let morton_code =
469            compute_morton_code_2d((row[0] * 1024.0) as u32, (row[1] * 1024.0) as u32);
470        morton_indices.push((morton_code, i));
471    }
472    morton_indices.sort_unstable_by_key(|&(code, _)| code);
473    let sorted_indices: Vec<usize> = morton_indices.iter().map(|(_, idx)| *idx).collect();
474    for (i_morton, &i) in sorted_indices.iter().enumerate() {
475        for (j_morton, &j) in sorted_indices.iter().enumerate().skip(i_morton) {
476            let distance = if i == j {
477                0.0f64
478            } else {
479                unsafe {
480                    euclidean_distance_f64_specialized(
481                        points.row(i).as_slice().unwrap_or(&[]),
482                        points.row(j).as_slice().unwrap_or(&[]),
483                    )
484                }
485            };
486            matrix[i * n + j] = distance;
487            matrix[j * n + i] = distance;
488        }
489    }
490    matrix
491}
492/// Compute Morton code (Z-order curve) for 2D spatial ordering with compiler optimizations
493#[inline(always)]
494#[must_use]
495fn compute_morton_code_2d(x: u32, y: u32) -> u64 {
496    let mut result = 0u64;
497    for i in 0..16 {
498        result |= ((x & (1 << i)) as u64) << (2 * i);
499        result |= ((y & (1 << i)) as u64) << (2 * i + 1);
500    }
501    result
502}
scirs2_spatial/distance/functions_2.rs

scirs2_spatial/distance/
functions_2.rs