nodedb_vector/quantize/
binary.rs1pub fn encode(vector: &[f32]) -> Vec<u8> {
18 let num_bytes = vector.len().div_ceil(8);
19 let mut bits = vec![0u8; num_bytes];
20 for (i, &val) in vector.iter().enumerate() {
21 if val > 0.0 {
22 bits[i / 8] |= 1 << (i % 8);
23 }
24 }
25 bits
26}
27
28pub fn encode_batch(vectors: &[&[f32]], dim: usize) -> Vec<u8> {
32 let bytes_per = dim.div_ceil(8);
33 let mut out = Vec::with_capacity(bytes_per * vectors.len());
34 for v in vectors {
35 out.extend(encode(v));
36 }
37 out
38}
39
40#[inline]
45pub fn hamming_distance(a: &[u8], b: &[u8]) -> u32 {
46 debug_assert_eq!(a.len(), b.len());
47 let mut dist = 0u32;
48 for i in 0..a.len() {
49 dist += (a[i] ^ b[i]).count_ones();
50 }
51 dist
52}
53
54#[inline]
59pub fn hamming_distance_fast(a: &[u8], b: &[u8]) -> u32 {
60 debug_assert_eq!(a.len(), b.len());
61 let mut dist = 0u32;
62 let chunks = a.len() / 8;
63 let remainder = a.len() % 8;
64
65 for i in 0..chunks {
67 let offset = i * 8;
68 let mut a_buf = [0u8; 8];
69 let mut b_buf = [0u8; 8];
70 a_buf.copy_from_slice(&a[offset..offset + 8]);
71 b_buf.copy_from_slice(&b[offset..offset + 8]);
72 dist += (u64::from_le_bytes(a_buf) ^ u64::from_le_bytes(b_buf)).count_ones();
73 }
74
75 let start = chunks * 8;
77 for i in 0..remainder {
78 dist += (a[start + i] ^ b[start + i]).count_ones();
79 }
80
81 dist
82}
83
84pub fn binary_size(dim: usize) -> usize {
86 dim.div_ceil(8)
87}
88
89#[cfg(test)]
90mod tests {
91 use super::*;
92
93 #[test]
94 fn encode_positive_negative() {
95 let v = [1.0, -1.0, 1.0, -1.0, 0.0, 1.0, -0.5, 0.5];
96 let bits = encode(&v);
97 assert_eq!(bits.len(), 1);
98 assert_eq!(bits[0], 0b10100101);
101 }
102
103 #[test]
104 fn hamming_identical_is_zero() {
105 let v = [1.0, -1.0, 1.0, 0.5];
106 let a = encode(&v);
107 let b = encode(&v);
108 assert_eq!(hamming_distance(&a, &b), 0);
109 }
110
111 #[test]
112 fn hamming_opposite_is_dim() {
113 let a_vec = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
114 let b_vec = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0];
115 let a = encode(&a_vec);
116 let b = encode(&b_vec);
117 assert_eq!(hamming_distance(&a, &b), 8);
118 }
119
120 #[test]
121 fn hamming_fast_matches_simple() {
122 let a_vec: Vec<f32> = (0..128)
124 .map(|i| if i % 3 == 0 { 1.0 } else { -1.0 })
125 .collect();
126 let b_vec: Vec<f32> = (0..128)
127 .map(|i| if i % 5 == 0 { 1.0 } else { -1.0 })
128 .collect();
129 let a = encode(&a_vec);
130 let b = encode(&b_vec);
131
132 let slow = hamming_distance(&a, &b);
133 let fast = hamming_distance_fast(&a, &b);
134 assert_eq!(slow, fast);
135 }
136
137 #[test]
138 fn high_dimensional_encoding() {
139 let v: Vec<f32> = (0..768).map(|i| (i as f32).sin()).collect();
141 let bits = encode(&v);
142 assert_eq!(bits.len(), 96);
143 }
144
145 #[test]
146 fn batch_encode_layout() {
147 let v1 = [1.0f32, -1.0, 1.0, -1.0];
148 let v2 = [-1.0f32, 1.0, -1.0, 1.0];
149 let batch = encode_batch(&[&v1, &v2], 4);
150 assert_eq!(batch.len(), 2); assert_eq!(batch[0], encode(&v1)[0]);
152 assert_eq!(batch[1], encode(&v2)[0]);
153 }
154}