diskann_disk/build/configuration/
disk_index_build_parameter.rs1#![warn(missing_debug_implementations, missing_docs)]
6
7use std::num::NonZeroUsize;
9
10use diskann::ANNError;
11use thiserror::Error;
12
13use super::QuantizationType;
14
15pub const BYTES_IN_GB: f64 = 1024_f64 * 1024_f64 * 1024_f64;
17
18pub const DISK_SECTOR_LEN: usize = 4096;
21
22#[derive(Debug, Error, PartialEq)]
24#[error("Budget must be greater than zero")]
25pub struct InvalidMemBudget;
26
27impl From<InvalidMemBudget> for ANNError {
28 fn from(value: InvalidMemBudget) -> Self {
29 ANNError::log_index_config_error("MemoryBudget".to_string(), format!("{value:?}"))
30 }
31}
32
33#[derive(Clone, Copy, PartialEq, Debug)]
35pub struct MemoryBudget {
36 bytes: NonZeroUsize,
37}
38
39impl MemoryBudget {
40 pub fn try_from_gb(gib: f64) -> Result<Self, InvalidMemBudget> {
42 let bytes_f = (gib * BYTES_IN_GB).round() as usize;
43 let bytes = NonZeroUsize::new(bytes_f).ok_or(InvalidMemBudget)?;
44
45 Ok(Self { bytes })
46 }
47
48 pub fn in_bytes(self) -> usize {
50 self.bytes.get()
51 }
52}
53
54#[derive(Debug, Error, PartialEq)]
56pub enum PQChunksError {
57 #[error("Dimension must be greater than zero")]
59 DimensionIsZero,
60 #[error("Number of PQ chunks must be within [1, {dim}], received {num_chunks}")]
62 OutOfRange {
63 num_chunks: usize,
65 dim: usize,
67 },
68}
69
70impl From<PQChunksError> for ANNError {
71 fn from(value: PQChunksError) -> Self {
72 ANNError::log_index_config_error("NumPQChunks".to_string(), format!("{value:?}"))
73 }
74}
75
76#[derive(Clone, Copy, PartialEq, Debug)]
78pub struct NumPQChunks(NonZeroUsize);
79
80impl NumPQChunks {
81 pub fn new_with(num_chunks: usize, dim: usize) -> Result<Self, PQChunksError> {
83 if dim == 0 {
84 return Err(PQChunksError::DimensionIsZero);
85 }
86
87 let num_chunks = NonZeroUsize::new(num_chunks).ok_or(PQChunksError::DimensionIsZero)?;
88
89 if num_chunks.get() > dim {
90 return Err(PQChunksError::OutOfRange {
91 dim,
92 num_chunks: num_chunks.get(),
93 });
94 }
95
96 Ok(Self(num_chunks))
97 }
98
99 pub fn get(self) -> usize {
101 self.0.into()
102 }
103}
104
105#[derive(Clone, Copy, PartialEq, Debug)]
107pub struct DiskIndexBuildParameters {
108 build_memory_limit: MemoryBudget,
110
111 search_pq_chunks: NumPQChunks,
113
114 build_quantization: QuantizationType,
116}
117
118impl DiskIndexBuildParameters {
119 pub fn new(
121 build_memory_limit: MemoryBudget,
122 build_quantization: QuantizationType,
123 search_pq_chunks: NumPQChunks,
124 ) -> Self {
125 Self {
126 build_memory_limit,
127 search_pq_chunks,
128 build_quantization,
129 }
130 }
131
132 pub fn build_memory_limit(&self) -> MemoryBudget {
134 self.build_memory_limit
135 }
136
137 pub fn build_quantization(&self) -> &QuantizationType {
139 &self.build_quantization
140 }
141
142 pub fn search_pq_chunks(&self) -> NumPQChunks {
144 self.search_pq_chunks
145 }
146}
147
148#[cfg(test)]
149mod dataset_test {
150 use diskann::{ANNError, ANNErrorKind};
151
152 use super::*;
153
154 #[test]
155 fn memory_budget_converts_units() {
156 let budget = MemoryBudget::try_from_gb(2.0).unwrap();
157 assert_eq!(budget.in_bytes() as f64, 2.0 * BYTES_IN_GB);
158 assert!(MemoryBudget::try_from_gb(0.0).is_err());
159 }
160
161 #[test]
162 fn build_with_num_of_pq_chunks_should_work() {
163 let memory_budget = MemoryBudget::try_from_gb(2.0).unwrap();
164 let num_pq_chunks = NumPQChunks::new_with(20, 128).unwrap();
165
166 let result = DiskIndexBuildParameters::new(
167 memory_budget,
168 QuantizationType::default(),
169 num_pq_chunks,
170 );
171
172 assert_eq!(result.search_pq_chunks().get(), num_pq_chunks.get());
173 }
174
175 #[test]
176 fn disk_index_build_parameters_try_new_handles_invalid() {
177 let memory_budget = MemoryBudget::try_from_gb(1.0).unwrap();
179 let pq_chunks = NumPQChunks::new_with(1, 128).unwrap();
180 let params =
181 DiskIndexBuildParameters::new(memory_budget, QuantizationType::default(), pq_chunks);
182
183 assert_eq!(
184 params.build_memory_limit().in_bytes() as f64,
185 1.0 * BYTES_IN_GB
186 );
187
188 assert!(MemoryBudget::try_from_gb(0.0).is_err());
190
191 let err = MemoryBudget::try_from_gb(-1.0)
192 .map_err(ANNError::from)
193 .unwrap_err();
194 assert_eq!(err.kind(), ANNErrorKind::IndexConfigError);
195 }
196
197 #[test]
198 fn num_pq_chunks_new_rejects_invalid_values() {
199 assert!(NumPQChunks::new_with(0, 128).is_err());
200 assert!(NumPQChunks::new_with(129, 128).is_err());
201 assert!(NumPQChunks::new_with(1, 0).is_err());
202 }
203
204 #[test]
205 fn num_pq_chunks_new_accepts_valid_values() {
206 let chunks = NumPQChunks::new_with(64, 128).unwrap();
207 assert_eq!(chunks.get(), 64);
208 }
209}