rig/embeddings/
distance.rs

1pub trait VectorDistance {
2    /// Get dot product of two embedding vectors
3    fn dot_product(&self, other: &Self) -> f64;
4
5    /// Get cosine similarity of two embedding vectors.
6    /// If `normalized` is true, the dot product is returned.
7    fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64;
8
9    /// Get angular distance of two embedding vectors.
10    fn angular_distance(&self, other: &Self, normalized: bool) -> f64;
11
12    /// Get euclidean distance of two embedding vectors.
13    fn euclidean_distance(&self, other: &Self) -> f64;
14
15    /// Get manhattan distance of two embedding vectors.
16    fn manhattan_distance(&self, other: &Self) -> f64;
17
18    /// Get chebyshev distance of two embedding vectors.
19    fn chebyshev_distance(&self, other: &Self) -> f64;
20}
21
22#[cfg(not(feature = "rayon"))]
23impl VectorDistance for crate::embeddings::Embedding {
24    fn dot_product(&self, other: &Self) -> f64 {
25        self.vec
26            .iter()
27            .zip(other.vec.iter())
28            .map(|(x, y)| x * y)
29            .sum()
30    }
31
32    fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
33        let dot_product = self.dot_product(other);
34
35        if normalized {
36            dot_product
37        } else {
38            let magnitude1: f64 = self.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
39            let magnitude2: f64 = other.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
40
41            dot_product / (magnitude1 * magnitude2)
42        }
43    }
44
45    fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
46        let cosine_sim = self.cosine_similarity(other, normalized);
47        cosine_sim.acos() / std::f64::consts::PI
48    }
49
50    fn euclidean_distance(&self, other: &Self) -> f64 {
51        self.vec
52            .iter()
53            .zip(other.vec.iter())
54            .map(|(x, y)| (x - y).powi(2))
55            .sum::<f64>()
56            .sqrt()
57    }
58
59    fn manhattan_distance(&self, other: &Self) -> f64 {
60        self.vec
61            .iter()
62            .zip(other.vec.iter())
63            .map(|(x, y)| (x - y).abs())
64            .sum()
65    }
66
67    fn chebyshev_distance(&self, other: &Self) -> f64 {
68        self.vec
69            .iter()
70            .zip(other.vec.iter())
71            .map(|(x, y)| (x - y).abs())
72            .fold(0.0, f64::max)
73    }
74}
75
76#[cfg(feature = "rayon")]
77mod rayon {
78    use crate::embeddings::{Embedding, distance::VectorDistance};
79    use rayon::prelude::*;
80
81    impl VectorDistance for Embedding {
82        fn dot_product(&self, other: &Self) -> f64 {
83            self.vec
84                .par_iter()
85                .zip(other.vec.par_iter())
86                .map(|(x, y)| x * y)
87                .sum()
88        }
89
90        fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
91            let dot_product = self.dot_product(other);
92
93            if normalized {
94                dot_product
95            } else {
96                let magnitude1: f64 = self.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
97                let magnitude2: f64 = other.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
98
99                dot_product / (magnitude1 * magnitude2)
100            }
101        }
102
103        fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
104            let cosine_sim = self.cosine_similarity(other, normalized);
105            cosine_sim.acos() / std::f64::consts::PI
106        }
107
108        fn euclidean_distance(&self, other: &Self) -> f64 {
109            self.vec
110                .par_iter()
111                .zip(other.vec.par_iter())
112                .map(|(x, y)| (x - y).powi(2))
113                .sum::<f64>()
114                .sqrt()
115        }
116
117        fn manhattan_distance(&self, other: &Self) -> f64 {
118            self.vec
119                .par_iter()
120                .zip(other.vec.par_iter())
121                .map(|(x, y)| (x - y).abs())
122                .sum()
123        }
124
125        fn chebyshev_distance(&self, other: &Self) -> f64 {
126            self.vec
127                .iter()
128                .zip(other.vec.iter())
129                .map(|(x, y)| (x - y).abs())
130                .fold(0.0, f64::max)
131        }
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::VectorDistance;
138    use crate::embeddings::Embedding;
139
140    fn embeddings() -> (Embedding, Embedding) {
141        let embedding_1 = Embedding {
142            document: "test".to_string(),
143            vec: vec![1.0, 2.0, 3.0],
144        };
145
146        let embedding_2 = Embedding {
147            document: "test".to_string(),
148            vec: vec![1.0, 5.0, 7.0],
149        };
150
151        (embedding_1, embedding_2)
152    }
153
154    #[test]
155    fn test_dot_product() {
156        let (embedding_1, embedding_2) = embeddings();
157
158        assert_eq!(embedding_1.dot_product(&embedding_2), 32.0)
159    }
160
161    #[test]
162    fn test_cosine_similarity() {
163        let (embedding_1, embedding_2) = embeddings();
164
165        assert_eq!(
166            embedding_1.cosine_similarity(&embedding_2, false),
167            0.9875414397573881
168        )
169    }
170
171    #[test]
172    fn test_angular_distance() {
173        let (embedding_1, embedding_2) = embeddings();
174
175        assert_eq!(
176            embedding_1.angular_distance(&embedding_2, false),
177            0.0502980301830343
178        )
179    }
180
181    #[test]
182    fn test_euclidean_distance() {
183        let (embedding_1, embedding_2) = embeddings();
184
185        assert_eq!(embedding_1.euclidean_distance(&embedding_2), 5.0)
186    }
187
188    #[test]
189    fn test_manhattan_distance() {
190        let (embedding_1, embedding_2) = embeddings();
191
192        assert_eq!(embedding_1.manhattan_distance(&embedding_2), 7.0)
193    }
194
195    #[test]
196    fn test_chebyshev_distance() {
197        let (embedding_1, embedding_2) = embeddings();
198
199        assert_eq!(embedding_1.chebyshev_distance(&embedding_2), 4.0)
200    }
201}