rig/embeddings/
distance.rs1pub trait VectorDistance {
2 fn dot_product(&self, other: &Self) -> f64;
4
5 fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64;
8
9 fn angular_distance(&self, other: &Self, normalized: bool) -> f64;
11
12 fn euclidean_distance(&self, other: &Self) -> f64;
14
15 fn manhattan_distance(&self, other: &Self) -> f64;
17
18 fn chebyshev_distance(&self, other: &Self) -> f64;
20}
21
22#[cfg(not(feature = "rayon"))]
23impl VectorDistance for crate::embeddings::Embedding {
24 fn dot_product(&self, other: &Self) -> f64 {
25 self.vec
26 .iter()
27 .zip(other.vec.iter())
28 .map(|(x, y)| x * y)
29 .sum()
30 }
31
32 fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
33 let dot_product = self.dot_product(other);
34
35 if normalized {
36 dot_product
37 } else {
38 let magnitude1: f64 = self.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
39 let magnitude2: f64 = other.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
40
41 dot_product / (magnitude1 * magnitude2)
42 }
43 }
44
45 fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
46 let cosine_sim = self.cosine_similarity(other, normalized);
47 cosine_sim.acos() / std::f64::consts::PI
48 }
49
50 fn euclidean_distance(&self, other: &Self) -> f64 {
51 self.vec
52 .iter()
53 .zip(other.vec.iter())
54 .map(|(x, y)| (x - y).powi(2))
55 .sum::<f64>()
56 .sqrt()
57 }
58
59 fn manhattan_distance(&self, other: &Self) -> f64 {
60 self.vec
61 .iter()
62 .zip(other.vec.iter())
63 .map(|(x, y)| (x - y).abs())
64 .sum()
65 }
66
67 fn chebyshev_distance(&self, other: &Self) -> f64 {
68 self.vec
69 .iter()
70 .zip(other.vec.iter())
71 .map(|(x, y)| (x - y).abs())
72 .fold(0.0, f64::max)
73 }
74}
75
76#[cfg(feature = "rayon")]
77mod rayon {
78 use crate::embeddings::{Embedding, distance::VectorDistance};
79 use rayon::prelude::*;
80
81 impl VectorDistance for Embedding {
82 fn dot_product(&self, other: &Self) -> f64 {
83 self.vec
84 .par_iter()
85 .zip(other.vec.par_iter())
86 .map(|(x, y)| x * y)
87 .sum()
88 }
89
90 fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
91 let dot_product = self.dot_product(other);
92
93 if normalized {
94 dot_product
95 } else {
96 let magnitude1: f64 = self.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
97 let magnitude2: f64 = other.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
98
99 dot_product / (magnitude1 * magnitude2)
100 }
101 }
102
103 fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
104 let cosine_sim = self.cosine_similarity(other, normalized);
105 cosine_sim.acos() / std::f64::consts::PI
106 }
107
108 fn euclidean_distance(&self, other: &Self) -> f64 {
109 self.vec
110 .par_iter()
111 .zip(other.vec.par_iter())
112 .map(|(x, y)| (x - y).powi(2))
113 .sum::<f64>()
114 .sqrt()
115 }
116
117 fn manhattan_distance(&self, other: &Self) -> f64 {
118 self.vec
119 .par_iter()
120 .zip(other.vec.par_iter())
121 .map(|(x, y)| (x - y).abs())
122 .sum()
123 }
124
125 fn chebyshev_distance(&self, other: &Self) -> f64 {
126 self.vec
127 .iter()
128 .zip(other.vec.iter())
129 .map(|(x, y)| (x - y).abs())
130 .fold(0.0, f64::max)
131 }
132 }
133}
134
135#[cfg(test)]
136mod tests {
137 use super::VectorDistance;
138 use crate::embeddings::Embedding;
139
140 fn embeddings() -> (Embedding, Embedding) {
141 let embedding_1 = Embedding {
142 document: "test".to_string(),
143 vec: vec![1.0, 2.0, 3.0],
144 };
145
146 let embedding_2 = Embedding {
147 document: "test".to_string(),
148 vec: vec![1.0, 5.0, 7.0],
149 };
150
151 (embedding_1, embedding_2)
152 }
153
154 #[test]
155 fn test_dot_product() {
156 let (embedding_1, embedding_2) = embeddings();
157
158 assert_eq!(embedding_1.dot_product(&embedding_2), 32.0)
159 }
160
161 #[test]
162 fn test_cosine_similarity() {
163 let (embedding_1, embedding_2) = embeddings();
164
165 assert_eq!(
166 embedding_1.cosine_similarity(&embedding_2, false),
167 0.9875414397573881
168 )
169 }
170
171 #[test]
172 fn test_angular_distance() {
173 let (embedding_1, embedding_2) = embeddings();
174
175 assert_eq!(
176 embedding_1.angular_distance(&embedding_2, false),
177 0.0502980301830343
178 )
179 }
180
181 #[test]
182 fn test_euclidean_distance() {
183 let (embedding_1, embedding_2) = embeddings();
184
185 assert_eq!(embedding_1.euclidean_distance(&embedding_2), 5.0)
186 }
187
188 #[test]
189 fn test_manhattan_distance() {
190 let (embedding_1, embedding_2) = embeddings();
191
192 assert_eq!(embedding_1.manhattan_distance(&embedding_2), 7.0)
193 }
194
195 #[test]
196 fn test_chebyshev_distance() {
197 let (embedding_1, embedding_2) = embeddings();
198
199 assert_eq!(embedding_1.chebyshev_distance(&embedding_2), 4.0)
200 }
201}