manifoldb_vector/distance/
binary.rs1use crate::types::BinaryEmbedding;
19
20#[inline]
42#[must_use]
43pub fn hamming_distance(a: &BinaryEmbedding, b: &BinaryEmbedding) -> u32 {
44 debug_assert_eq!(a.dimension(), b.dimension(), "binary embeddings must have same dimension");
45
46 a.data().iter().zip(b.data().iter()).map(|(&x, &y)| (x ^ y).count_ones()).sum()
47}
48
49#[inline]
58#[must_use]
59pub fn hamming_distance_raw(a: &[u64], b: &[u64]) -> u32 {
60 debug_assert_eq!(a.len(), b.len(), "bit vectors must have same length");
61
62 a.iter().zip(b.iter()).map(|(&x, &y)| (x ^ y).count_ones()).sum()
63}
64
65#[inline]
75#[must_use]
76pub fn hamming_distance_normalized(a: &BinaryEmbedding, b: &BinaryEmbedding) -> f32 {
77 let distance = hamming_distance(a, b);
78 distance as f32 / a.dimension() as f32
79}
80
81#[inline]
95#[must_use]
96pub fn jaccard_similarity(a: &BinaryEmbedding, b: &BinaryEmbedding) -> f32 {
97 debug_assert_eq!(a.dimension(), b.dimension(), "binary embeddings must have same dimension");
98
99 let mut intersection: u32 = 0;
100 let mut union: u32 = 0;
101
102 for (&x, &y) in a.data().iter().zip(b.data().iter()) {
103 intersection += (x & y).count_ones();
104 union += (x | y).count_ones();
105 }
106
107 if union == 0 {
108 1.0 } else {
110 intersection as f32 / union as f32
111 }
112}
113
114#[inline]
122#[must_use]
123pub fn jaccard_distance(a: &BinaryEmbedding, b: &BinaryEmbedding) -> f32 {
124 1.0 - jaccard_similarity(a, b)
125}
126
127#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
129pub enum BinaryDistanceMetric {
130 Hamming,
132 HammingNormalized,
134 Jaccard,
136}
137
138impl BinaryDistanceMetric {
139 #[inline]
145 #[must_use]
146 pub fn calculate(&self, a: &BinaryEmbedding, b: &BinaryEmbedding) -> f32 {
147 match self {
148 Self::Hamming => hamming_distance(a, b) as f32,
149 Self::HammingNormalized => hamming_distance_normalized(a, b),
150 Self::Jaccard => jaccard_distance(a, b),
151 }
152 }
153}
154
155#[cfg(test)]
156mod tests {
157 use super::*;
158
159 const EPSILON: f32 = 1e-6;
160
161 fn assert_near(a: f32, b: f32, epsilon: f32) {
162 assert!(
163 (a - b).abs() < epsilon,
164 "assertion failed: {} !~ {} (diff: {})",
165 a,
166 b,
167 (a - b).abs()
168 );
169 }
170
171 #[test]
172 fn test_hamming_distance_identical() {
173 let a = BinaryEmbedding::new(vec![0xFFFF_FFFF_FFFF_FFFFu64], 64).unwrap();
174 let b = BinaryEmbedding::new(vec![0xFFFF_FFFF_FFFF_FFFFu64], 64).unwrap();
175 assert_eq!(hamming_distance(&a, &b), 0);
176 }
177
178 #[test]
179 fn test_hamming_distance_opposite() {
180 let a = BinaryEmbedding::new(vec![0x0000_0000_0000_0000u64], 64).unwrap();
181 let b = BinaryEmbedding::new(vec![0xFFFF_FFFF_FFFF_FFFFu64], 64).unwrap();
182 assert_eq!(hamming_distance(&a, &b), 64);
183 }
184
185 #[test]
186 fn test_hamming_distance_half() {
187 let a = BinaryEmbedding::new(vec![0xFFFF_FFFF_0000_0000u64], 64).unwrap();
188 let b = BinaryEmbedding::new(vec![0x0000_0000_FFFF_FFFFu64], 64).unwrap();
189 assert_eq!(hamming_distance(&a, &b), 64);
190 }
191
192 #[test]
193 fn test_hamming_distance_one_bit() {
194 let a = BinaryEmbedding::new(vec![0b0000_0001u64], 8).unwrap();
195 let b = BinaryEmbedding::new(vec![0b0000_0000u64], 8).unwrap();
196 assert_eq!(hamming_distance(&a, &b), 1);
197 }
198
199 #[test]
200 fn test_hamming_distance_multi_word() {
201 let a = BinaryEmbedding::new(vec![0xFFFF_FFFF_FFFF_FFFFu64, 0x0000_0000_0000_0000u64], 128)
202 .unwrap();
203 let b = BinaryEmbedding::new(vec![0x0000_0000_0000_0000u64, 0xFFFF_FFFF_FFFF_FFFFu64], 128)
204 .unwrap();
205 assert_eq!(hamming_distance(&a, &b), 128);
206 }
207
208 #[test]
209 fn test_hamming_distance_raw() {
210 let a = [0xFFFF_0000u64, 0x0000_FFFFu64];
211 let b = [0x0000_FFFFu64, 0xFFFF_0000u64];
212 assert_eq!(hamming_distance_raw(&a, &b), 64);
213 }
214
215 #[test]
216 fn test_hamming_distance_normalized() {
217 let a = BinaryEmbedding::zeros(100).unwrap();
218 let b = BinaryEmbedding::ones(100).unwrap();
219 assert_near(hamming_distance_normalized(&a, &b), 1.0, EPSILON);
220
221 let c = BinaryEmbedding::zeros(100).unwrap();
222 assert_near(hamming_distance_normalized(&a, &c), 0.0, EPSILON);
223 }
224
225 #[test]
226 fn test_jaccard_similarity_identical() {
227 let a = BinaryEmbedding::new(vec![0b1111_0000u64], 8).unwrap();
228 let b = BinaryEmbedding::new(vec![0b1111_0000u64], 8).unwrap();
229 assert_near(jaccard_similarity(&a, &b), 1.0, EPSILON);
230 }
231
232 #[test]
233 fn test_jaccard_similarity_disjoint() {
234 let a = BinaryEmbedding::new(vec![0b1111_0000u64], 8).unwrap();
235 let b = BinaryEmbedding::new(vec![0b0000_1111u64], 8).unwrap();
236 assert_near(jaccard_similarity(&a, &b), 0.0, EPSILON);
238 }
239
240 #[test]
241 fn test_jaccard_similarity_partial_overlap() {
242 let a = BinaryEmbedding::new(vec![0b1111_1100u64], 8).unwrap();
243 let b = BinaryEmbedding::new(vec![0b0011_1111u64], 8).unwrap();
244 assert_near(jaccard_similarity(&a, &b), 4.0 / 8.0, EPSILON);
247 }
248
249 #[test]
250 fn test_jaccard_similarity_both_zero() {
251 let a = BinaryEmbedding::zeros(64).unwrap();
252 let b = BinaryEmbedding::zeros(64).unwrap();
253 assert_near(jaccard_similarity(&a, &b), 1.0, EPSILON);
254 }
255
256 #[test]
257 fn test_jaccard_distance() {
258 let a = BinaryEmbedding::new(vec![0b1111_0000u64], 8).unwrap();
259 let b = BinaryEmbedding::new(vec![0b1111_0000u64], 8).unwrap();
260 assert_near(jaccard_distance(&a, &b), 0.0, EPSILON);
261 }
262
263 #[test]
264 fn test_binary_distance_metric_calculate() {
265 let a = BinaryEmbedding::new(vec![0b1111_0000u64], 8).unwrap();
266 let b = BinaryEmbedding::new(vec![0b1010_1010u64], 8).unwrap();
267
268 assert_near(BinaryDistanceMetric::Hamming.calculate(&a, &b), 4.0, EPSILON);
270
271 assert_near(BinaryDistanceMetric::HammingNormalized.calculate(&a, &b), 0.5, EPSILON);
273 }
274}