manifoldb_vector/quantization/
config.rs1use crate::distance::DistanceMetric;
4use crate::error::VectorError;
5
6#[derive(Debug, Clone)]
24pub struct PQConfig {
25 pub dimension: usize,
27 pub num_segments: usize,
29 pub num_centroids: usize,
31 pub distance_metric: DistanceMetric,
33 pub training_iterations: usize,
35 pub seed: Option<u64>,
37}
38
39impl PQConfig {
40 #[must_use]
58 pub fn new(dimension: usize, num_segments: usize) -> Self {
59 assert!(num_segments > 0, "num_segments must be > 0");
60 assert!(
61 dimension % num_segments == 0,
62 "dimension ({}) must be divisible by num_segments ({})",
63 dimension,
64 num_segments
65 );
66
67 Self {
68 dimension,
69 num_segments,
70 num_centroids: 256,
71 distance_metric: DistanceMetric::Euclidean,
72 training_iterations: 25,
73 seed: None,
74 }
75 }
76
77 #[must_use]
83 pub const fn with_num_centroids(mut self, k: usize) -> Self {
84 self.num_centroids = k;
85 self
86 }
87
88 #[must_use]
90 pub const fn with_distance_metric(mut self, metric: DistanceMetric) -> Self {
91 self.distance_metric = metric;
92 self
93 }
94
95 #[must_use]
97 pub const fn with_training_iterations(mut self, iterations: usize) -> Self {
98 self.training_iterations = iterations;
99 self
100 }
101
102 #[must_use]
104 pub const fn with_seed(mut self, seed: u64) -> Self {
105 self.seed = Some(seed);
106 self
107 }
108
109 #[must_use]
111 pub const fn subspace_dimension(&self) -> usize {
112 self.dimension / self.num_segments
113 }
114
115 #[must_use]
119 #[allow(clippy::cast_possible_truncation)]
120 #[allow(clippy::cast_sign_loss)]
121 pub fn bits_per_code(&self) -> usize {
122 if self.num_centroids <= 1 {
124 1
125 } else {
126 (self.num_centroids as f64).log2().ceil() as usize
127 }
128 }
129
130 #[must_use]
132 pub fn bytes_per_code(&self) -> usize {
133 let bits = self.bits_per_code();
136 let total_bits = bits * self.num_segments;
137 total_bits.div_ceil(8)
138 }
139
140 pub fn validate(&self) -> Result<(), VectorError> {
149 if self.dimension == 0 {
150 return Err(VectorError::InvalidDimension { expected: 1, actual: 0 });
151 }
152
153 if self.num_segments == 0 {
154 return Err(VectorError::Encoding("num_segments must be > 0".to_string()));
155 }
156
157 if self.dimension % self.num_segments != 0 {
158 return Err(VectorError::Encoding(format!(
159 "dimension ({}) must be divisible by num_segments ({})",
160 self.dimension, self.num_segments
161 )));
162 }
163
164 if self.num_centroids == 0 {
165 return Err(VectorError::Encoding("num_centroids must be > 0".to_string()));
166 }
167
168 Ok(())
169 }
170
171 #[must_use]
173 pub fn compression_ratio(&self) -> f32 {
174 let original_bytes = self.dimension * 4; let compressed_bytes = self.bytes_per_code();
176 original_bytes as f32 / compressed_bytes as f32
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 #[test]
185 fn test_basic_config() {
186 let config = PQConfig::new(128, 8);
187 assert_eq!(config.dimension, 128);
188 assert_eq!(config.num_segments, 8);
189 assert_eq!(config.num_centroids, 256);
190 assert_eq!(config.subspace_dimension(), 16);
191 }
192
193 #[test]
194 fn test_bits_per_code() {
195 let config = PQConfig::new(128, 8).with_num_centroids(256);
196 assert_eq!(config.bits_per_code(), 8);
197
198 let config = PQConfig::new(128, 8).with_num_centroids(65536);
199 assert_eq!(config.bits_per_code(), 16);
200
201 let config = PQConfig::new(128, 8).with_num_centroids(16);
202 assert_eq!(config.bits_per_code(), 4);
203 }
204
205 #[test]
206 fn test_bytes_per_code() {
207 let config = PQConfig::new(128, 8).with_num_centroids(256);
209 assert_eq!(config.bytes_per_code(), 8);
210
211 let config = PQConfig::new(128, 8).with_num_centroids(65536);
213 assert_eq!(config.bytes_per_code(), 16);
214 }
215
216 #[test]
217 fn test_compression_ratio() {
218 let config = PQConfig::new(128, 8).with_num_centroids(256);
220 assert!((config.compression_ratio() - 64.0).abs() < 0.01);
221 }
222
223 #[test]
224 fn test_validation() {
225 let config = PQConfig::new(128, 8);
226 assert!(config.validate().is_ok());
227
228 let mut config = PQConfig::new(128, 8);
230 config.num_centroids = 0;
231 assert!(config.validate().is_err());
232 }
233
234 #[test]
235 #[should_panic(expected = "num_segments must be > 0")]
236 fn test_zero_segments_panics() {
237 let _ = PQConfig::new(128, 0);
238 }
239
240 #[test]
241 #[should_panic(expected = "must be divisible by")]
242 fn test_indivisible_dimension_panics() {
243 let _ = PQConfig::new(128, 7);
244 }
245}