Skip to main content

rig_core/embeddings/
distance.rs

1//! Distance and similarity helpers for embedding vectors.
2//!
3//! The [`VectorDistance`] implementation for [`Embedding`](crate::embeddings::Embedding)
4//! uses iterator-based calculations by default and switches to Rayon-backed
5//! parallel iterators when the `rayon` feature is enabled.
6
7/// Distance and similarity metrics for embedding vectors.
8pub trait VectorDistance {
9    /// Get dot product of two embedding vectors
10    fn dot_product(&self, other: &Self) -> f64;
11
12    /// Get cosine similarity of two embedding vectors.
13    /// If `normalized` is true, the dot product is returned.
14    fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64;
15
16    /// Get angular distance of two embedding vectors.
17    fn angular_distance(&self, other: &Self, normalized: bool) -> f64;
18
19    /// Get euclidean distance of two embedding vectors.
20    fn euclidean_distance(&self, other: &Self) -> f64;
21
22    /// Get manhattan distance of two embedding vectors.
23    fn manhattan_distance(&self, other: &Self) -> f64;
24
25    /// Get chebyshev distance of two embedding vectors.
26    fn chebyshev_distance(&self, other: &Self) -> f64;
27}
28
29#[cfg(not(feature = "rayon"))]
30impl VectorDistance for crate::embeddings::Embedding {
31    fn dot_product(&self, other: &Self) -> f64 {
32        self.vec
33            .iter()
34            .zip(other.vec.iter())
35            .map(|(x, y)| x * y)
36            .sum()
37    }
38
39    fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
40        let dot_product = self.dot_product(other);
41
42        if normalized {
43            dot_product
44        } else {
45            let magnitude1: f64 = self.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
46            let magnitude2: f64 = other.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
47
48            dot_product / (magnitude1 * magnitude2)
49        }
50    }
51
52    fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
53        let cosine_sim = self.cosine_similarity(other, normalized);
54        cosine_sim.acos() / std::f64::consts::PI
55    }
56
57    fn euclidean_distance(&self, other: &Self) -> f64 {
58        self.vec
59            .iter()
60            .zip(other.vec.iter())
61            .map(|(x, y)| (x - y).powi(2))
62            .sum::<f64>()
63            .sqrt()
64    }
65
66    fn manhattan_distance(&self, other: &Self) -> f64 {
67        self.vec
68            .iter()
69            .zip(other.vec.iter())
70            .map(|(x, y)| (x - y).abs())
71            .sum()
72    }
73
74    fn chebyshev_distance(&self, other: &Self) -> f64 {
75        self.vec
76            .iter()
77            .zip(other.vec.iter())
78            .map(|(x, y)| (x - y).abs())
79            .fold(0.0, f64::max)
80    }
81}
82
83#[cfg(feature = "rayon")]
84mod rayon {
85    use crate::embeddings::{Embedding, distance::VectorDistance};
86    use rayon::prelude::*;
87
88    impl VectorDistance for Embedding {
89        fn dot_product(&self, other: &Self) -> f64 {
90            self.vec
91                .par_iter()
92                .zip(other.vec.par_iter())
93                .map(|(x, y)| x * y)
94                .sum()
95        }
96
97        fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
98            let dot_product = self.dot_product(other);
99
100            if normalized {
101                dot_product
102            } else {
103                let magnitude1: f64 = self.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
104                let magnitude2: f64 = other.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
105
106                dot_product / (magnitude1 * magnitude2)
107            }
108        }
109
110        fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
111            let cosine_sim = self.cosine_similarity(other, normalized);
112            cosine_sim.acos() / std::f64::consts::PI
113        }
114
115        fn euclidean_distance(&self, other: &Self) -> f64 {
116            self.vec
117                .par_iter()
118                .zip(other.vec.par_iter())
119                .map(|(x, y)| (x - y).powi(2))
120                .sum::<f64>()
121                .sqrt()
122        }
123
124        fn manhattan_distance(&self, other: &Self) -> f64 {
125            self.vec
126                .par_iter()
127                .zip(other.vec.par_iter())
128                .map(|(x, y)| (x - y).abs())
129                .sum()
130        }
131
132        fn chebyshev_distance(&self, other: &Self) -> f64 {
133            self.vec
134                .iter()
135                .zip(other.vec.iter())
136                .map(|(x, y)| (x - y).abs())
137                .fold(0.0, f64::max)
138        }
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::VectorDistance;
145    use crate::embeddings::Embedding;
146
147    fn embeddings() -> (Embedding, Embedding) {
148        let embedding_1 = Embedding {
149            document: "test".to_string(),
150            vec: vec![1.0, 2.0, 3.0],
151        };
152
153        let embedding_2 = Embedding {
154            document: "test".to_string(),
155            vec: vec![1.0, 5.0, 7.0],
156        };
157
158        (embedding_1, embedding_2)
159    }
160
161    #[test]
162    fn test_dot_product() {
163        let (embedding_1, embedding_2) = embeddings();
164
165        assert_eq!(embedding_1.dot_product(&embedding_2), 32.0)
166    }
167
168    #[test]
169    fn test_cosine_similarity() {
170        let (embedding_1, embedding_2) = embeddings();
171
172        assert_eq!(
173            embedding_1.cosine_similarity(&embedding_2, false),
174            0.9875414397573881
175        )
176    }
177
178    #[test]
179    fn test_angular_distance() {
180        let (embedding_1, embedding_2) = embeddings();
181
182        assert_eq!(
183            embedding_1.angular_distance(&embedding_2, false),
184            0.0502980301830343
185        )
186    }
187
188    #[test]
189    fn test_euclidean_distance() {
190        let (embedding_1, embedding_2) = embeddings();
191
192        assert_eq!(embedding_1.euclidean_distance(&embedding_2), 5.0)
193    }
194
195    #[test]
196    fn test_manhattan_distance() {
197        let (embedding_1, embedding_2) = embeddings();
198
199        assert_eq!(embedding_1.manhattan_distance(&embedding_2), 7.0)
200    }
201
202    #[test]
203    fn test_chebyshev_distance() {
204        let (embedding_1, embedding_2) = embeddings();
205
206        assert_eq!(embedding_1.chebyshev_distance(&embedding_2), 4.0)
207    }
208}