diskann_vector/
lib.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5//! # vector
6//!
7//! This crate contains SIMD accelerated functions for operating on vector data. Note that the name 'vector'
8//! does not exclusively mean embedding vectors, but any array of data appropriate for SIMD. Therefor, aside
9//! from fast implementations of distance for real vectors, this crate also includes things like SIMD
10//! accelerated contains for slices.
11#![cfg_attr(
12    not(test),
13    warn(
14        clippy::panic,
15        clippy::unwrap_used,
16        clippy::expect_used,
17        clippy::undocumented_unsafe_blocks
18    )
19)]
20
21mod half;
22pub use half::Half;
23
24mod traits;
25pub use traits::{DistanceFunction, Norm, PreprocessedDistanceFunction, PureDistanceFunction};
26
27mod value;
28pub use value::{MathematicalValue, SimilarityScore};
29
30pub mod contains;
31pub mod conversion;
32pub mod distance;
33pub mod norm;
34
35cfg_if::cfg_if! {
36    if #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] {
37        const CACHE_LINE_SIZE: usize = 64;
38
39        #[inline(always)]
40        unsafe fn prefetch_exactly<const N: usize>(ptr: *const i8) {
41            use std::arch::x86_64::*;
42            for i in 0..N {
43                _mm_prefetch(ptr.add(i * CACHE_LINE_SIZE), _MM_HINT_T0);
44            }
45        }
46
47        #[inline(always)]
48        unsafe fn prefetch_at_most<const N: usize>(ptr: *const i8, bytes: usize) {
49            use std::arch::x86_64::*;
50            for i in 0..N {
51                if CACHE_LINE_SIZE * i >= bytes {
52                    break;
53                }
54                _mm_prefetch(ptr.add(i * CACHE_LINE_SIZE), _MM_HINT_T0);
55            }
56        }
57
58        /// Prefetch the given vector in chunks of 64 bytes, which is a cache line size.
59        /// Only the first `MAX_BLOCKS` chunks will be prefetched.
60        #[inline]
61        pub fn prefetch_hint_max<const MAX_CACHE_LINES: usize, T>(vec: &[T]) {
62            let vecsize = std::mem::size_of_val(vec);
63            if vecsize >= MAX_CACHE_LINES * 64 {
64                // SAFETY: Pointer is in-bounds and use of the intrinsic is cfg gated.
65                unsafe { prefetch_exactly::<MAX_CACHE_LINES>(vec.as_ptr().cast()) }
66            } else {
67                // SAFETY: Pointer is in-bounds and use of the intrinsic is cfg gated.
68                unsafe { prefetch_at_most::<MAX_CACHE_LINES>(vec.as_ptr().cast(), vecsize) }
69            }
70        }
71
72        /// Prefetch the given vector in chunks of 64 bytes, which is a cache line size.
73        /// The entire vector will be prefetched.
74        #[inline]
75        pub fn prefetch_hint_all<T>(vec: &[T]) {
76            use std::arch::x86_64::*;
77
78            let vecsize = std::mem::size_of_val(vec);
79            let num_prefetch_blocks = vecsize.div_ceil(64);
80            let vec_ptr = vec.as_ptr() as *const i8;
81            for d in 0..num_prefetch_blocks {
82                // SAFETY: Pointer is in-bounds and use of the intrinsic is gated by the
83                // `cfg`-guard on this function.
84                unsafe {
85                    std::arch::x86_64::_mm_prefetch(vec_ptr.add(d * CACHE_LINE_SIZE), _MM_HINT_T0);
86                }
87            }        }
88    } else {
89        pub fn prefetch_hint_max<const MAX_CACHE_LINES: usize, T>(_vec: &[T]) {}
90        pub fn prefetch_hint_all<T>(_vec: &[T]) {}
91    }
92}
93
94#[cfg(test)]
95mod test_util;