nodedb_vector/quantize/
binary.rs1pub fn encode(vector: &[f32]) -> Vec<u8> {
16 let num_bytes = vector.len().div_ceil(8);
17 let mut bits = vec![0u8; num_bytes];
18 for (i, &val) in vector.iter().enumerate() {
19 if val > 0.0 {
20 bits[i / 8] |= 1 << (i % 8);
21 }
22 }
23 bits
24}
25
26pub fn encode_batch(vectors: &[&[f32]], dim: usize) -> Vec<u8> {
30 let bytes_per = dim.div_ceil(8);
31 let mut out = Vec::with_capacity(bytes_per * vectors.len());
32 for v in vectors {
33 out.extend(encode(v));
34 }
35 out
36}
37
38#[inline]
43pub fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
44 debug_assert_eq!(a.len(), b.len());
45 let mut dist = 0u32;
46 for i in 0..a.len() {
47 dist += (a[i] ^ b[i]).count_ones();
48 }
49 dist
50}
51
52#[inline]
57pub fn hamming_distance_fast(a: &[u8], b: &[u8]) -> u32 {
58 debug_assert_eq!(a.len(), b.len());
59 let mut dist = 0u32;
60 let chunks = a.len() / 8;
61 let remainder = a.len() % 8;
62
63 for i in 0..chunks {
65 let offset = i * 8;
66 let mut a_buf = [0u8; 8];
67 let mut b_buf = [0u8; 8];
68 a_buf.copy_from_slice(&a[offset..offset + 8]);
69 b_buf.copy_from_slice(&b[offset..offset + 8]);
70 dist += (u64::from_le_bytes(a_buf) ^ u64::from_le_bytes(b_buf)).count_ones();
71 }
72
73 let start = chunks * 8;
75 for i in 0..remainder {
76 dist += (a[start + i] ^ b[start + i]).count_ones();
77 }
78
79 dist
80}
81
82pub fn binary_size(dim: usize) -> usize {
84 dim.div_ceil(8)
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90
91 #[test]
92 fn encode_positive_negative() {
93 let v = [1.0, -1.0, 1.0, -1.0, 0.0, 1.0, -0.5, 0.5];
94 let bits = encode(&v);
95 assert_eq!(bits.len(), 1);
96 assert_eq!(bits[0], 0b10100101);
99 }
100
101 #[test]
102 fn hamming_identical_is_zero() {
103 let v = [1.0, -1.0, 1.0, 0.5];
104 let a = encode(&v);
105 let b = encode(&v);
106 assert_eq!(hamming_distance(&a, &b), 0);
107 }
108
109 #[test]
110 fn hamming_opposite_is_dim() {
111 let a_vec = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
112 let b_vec = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0];
113 let a = encode(&a_vec);
114 let b = encode(&b_vec);
115 assert_eq!(hamming_distance(&a, &b), 8);
116 }
117
118 #[test]
119 fn hamming_fast_matches_simple() {
120 let a_vec: Vec<f32> = (0..128)
122 .map(|i| if i % 3 == 0 { 1.0 } else { -1.0 })
123 .collect();
124 let b_vec: Vec<f32> = (0..128)
125 .map(|i| if i % 5 == 0 { 1.0 } else { -1.0 })
126 .collect();
127 let a = encode(&a_vec);
128 let b = encode(&b_vec);
129
130 let slow = hamming_distance(&a, &b);
131 let fast = hamming_distance_fast(&a, &b);
132 assert_eq!(slow, fast);
133 }
134
135 #[test]
136 fn high_dimensional_encoding() {
137 let v: Vec<f32> = (0..768).map(|i| (i as f32).sin()).collect();
139 let bits = encode(&v);
140 assert_eq!(bits.len(), 96);
141 }
142
143 #[test]
144 fn batch_encode_layout() {
145 let v1 = [1.0f32, -1.0, 1.0, -1.0];
146 let v2 = [-1.0f32, 1.0, -1.0, 1.0];
147 let batch = encode_batch(&[&v1, &v2], 4);
148 assert_eq!(batch.len(), 2); assert_eq!(batch[0], encode(&v1)[0]);
150 assert_eq!(batch[1], encode(&v2)[0]);
151 }
152}