rig_core/embeddings/
distance.rs1pub trait VectorDistance {
9 fn dot_product(&self, other: &Self) -> f64;
11
12 fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64;
15
16 fn angular_distance(&self, other: &Self, normalized: bool) -> f64;
18
19 fn euclidean_distance(&self, other: &Self) -> f64;
21
22 fn manhattan_distance(&self, other: &Self) -> f64;
24
25 fn chebyshev_distance(&self, other: &Self) -> f64;
27}
28
29#[cfg(not(feature = "rayon"))]
30impl VectorDistance for crate::embeddings::Embedding {
31 fn dot_product(&self, other: &Self) -> f64 {
32 self.vec
33 .iter()
34 .zip(other.vec.iter())
35 .map(|(x, y)| x * y)
36 .sum()
37 }
38
39 fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
40 let dot_product = self.dot_product(other);
41
42 if normalized {
43 dot_product
44 } else {
45 let magnitude1: f64 = self.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
46 let magnitude2: f64 = other.vec.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
47
48 dot_product / (magnitude1 * magnitude2)
49 }
50 }
51
52 fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
53 let cosine_sim = self.cosine_similarity(other, normalized);
54 cosine_sim.acos() / std::f64::consts::PI
55 }
56
57 fn euclidean_distance(&self, other: &Self) -> f64 {
58 self.vec
59 .iter()
60 .zip(other.vec.iter())
61 .map(|(x, y)| (x - y).powi(2))
62 .sum::<f64>()
63 .sqrt()
64 }
65
66 fn manhattan_distance(&self, other: &Self) -> f64 {
67 self.vec
68 .iter()
69 .zip(other.vec.iter())
70 .map(|(x, y)| (x - y).abs())
71 .sum()
72 }
73
74 fn chebyshev_distance(&self, other: &Self) -> f64 {
75 self.vec
76 .iter()
77 .zip(other.vec.iter())
78 .map(|(x, y)| (x - y).abs())
79 .fold(0.0, f64::max)
80 }
81}
82
83#[cfg(feature = "rayon")]
84mod rayon {
85 use crate::embeddings::{Embedding, distance::VectorDistance};
86 use rayon::prelude::*;
87
88 impl VectorDistance for Embedding {
89 fn dot_product(&self, other: &Self) -> f64 {
90 self.vec
91 .par_iter()
92 .zip(other.vec.par_iter())
93 .map(|(x, y)| x * y)
94 .sum()
95 }
96
97 fn cosine_similarity(&self, other: &Self, normalized: bool) -> f64 {
98 let dot_product = self.dot_product(other);
99
100 if normalized {
101 dot_product
102 } else {
103 let magnitude1: f64 = self.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
104 let magnitude2: f64 = other.vec.par_iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
105
106 dot_product / (magnitude1 * magnitude2)
107 }
108 }
109
110 fn angular_distance(&self, other: &Self, normalized: bool) -> f64 {
111 let cosine_sim = self.cosine_similarity(other, normalized);
112 cosine_sim.acos() / std::f64::consts::PI
113 }
114
115 fn euclidean_distance(&self, other: &Self) -> f64 {
116 self.vec
117 .par_iter()
118 .zip(other.vec.par_iter())
119 .map(|(x, y)| (x - y).powi(2))
120 .sum::<f64>()
121 .sqrt()
122 }
123
124 fn manhattan_distance(&self, other: &Self) -> f64 {
125 self.vec
126 .par_iter()
127 .zip(other.vec.par_iter())
128 .map(|(x, y)| (x - y).abs())
129 .sum()
130 }
131
132 fn chebyshev_distance(&self, other: &Self) -> f64 {
133 self.vec
134 .iter()
135 .zip(other.vec.iter())
136 .map(|(x, y)| (x - y).abs())
137 .fold(0.0, f64::max)
138 }
139 }
140}
141
142#[cfg(test)]
143mod tests {
144 use super::VectorDistance;
145 use crate::embeddings::Embedding;
146
147 fn embeddings() -> (Embedding, Embedding) {
148 let embedding_1 = Embedding {
149 document: "test".to_string(),
150 vec: vec![1.0, 2.0, 3.0],
151 };
152
153 let embedding_2 = Embedding {
154 document: "test".to_string(),
155 vec: vec![1.0, 5.0, 7.0],
156 };
157
158 (embedding_1, embedding_2)
159 }
160
161 #[test]
162 fn test_dot_product() {
163 let (embedding_1, embedding_2) = embeddings();
164
165 assert_eq!(embedding_1.dot_product(&embedding_2), 32.0)
166 }
167
168 #[test]
169 fn test_cosine_similarity() {
170 let (embedding_1, embedding_2) = embeddings();
171
172 assert_eq!(
173 embedding_1.cosine_similarity(&embedding_2, false),
174 0.9875414397573881
175 )
176 }
177
178 #[test]
179 fn test_angular_distance() {
180 let (embedding_1, embedding_2) = embeddings();
181
182 assert_eq!(
183 embedding_1.angular_distance(&embedding_2, false),
184 0.0502980301830343
185 )
186 }
187
188 #[test]
189 fn test_euclidean_distance() {
190 let (embedding_1, embedding_2) = embeddings();
191
192 assert_eq!(embedding_1.euclidean_distance(&embedding_2), 5.0)
193 }
194
195 #[test]
196 fn test_manhattan_distance() {
197 let (embedding_1, embedding_2) = embeddings();
198
199 assert_eq!(embedding_1.manhattan_distance(&embedding_2), 7.0)
200 }
201
202 #[test]
203 fn test_chebyshev_distance() {
204 let (embedding_1, embedding_2) = embeddings();
205
206 assert_eq!(embedding_1.chebyshev_distance(&embedding_2), 4.0)
207 }
208}