Skip to main content

diskann_disk/build/configuration/
disk_index_build_parameter.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5#![warn(missing_debug_implementations, missing_docs)]
6
7//! Parameters for disk index construction.
8use std::num::NonZeroUsize;
9
10use diskann::ANNError;
11use thiserror::Error;
12
13use super::QuantizationType;
14
15/// GB to bytes ratio.
16pub const BYTES_IN_GB: f64 = 1024_f64 * 1024_f64 * 1024_f64;
17
18/// Disk sector length in bytes. This is used as the offset alignment and
19/// smallest block size when reading/writing index data from/to disk.
20pub const DISK_SECTOR_LEN: usize = 4096;
21
22/// Errors returned when validating PQ chunk parameters.
23#[derive(Debug, Error, PartialEq)]
24#[error("Budget must be greater than zero")]
25pub struct InvalidMemBudget;
26
27impl From<InvalidMemBudget> for ANNError {
28    fn from(value: InvalidMemBudget) -> Self {
29        ANNError::log_index_config_error("MemoryBudget".to_string(), format!("{value:?}"))
30    }
31}
32
33/// Memory budget for building the disk index.
34#[derive(Clone, Copy, PartialEq, Debug)]
35pub struct MemoryBudget {
36    bytes: NonZeroUsize,
37}
38
39impl MemoryBudget {
40    /// Create a memory budget from gibibytes.
41    pub fn try_from_gb(gib: f64) -> Result<Self, InvalidMemBudget> {
42        let bytes_f = (gib * BYTES_IN_GB).round() as usize;
43        let bytes = NonZeroUsize::new(bytes_f).ok_or(InvalidMemBudget)?;
44
45        Ok(Self { bytes })
46    }
47
48    /// Returns the budget in bytes.
49    pub fn in_bytes(self) -> usize {
50        self.bytes.get()
51    }
52}
53
54/// Errors returned when validating PQ chunk parameters.
55#[derive(Debug, Error, PartialEq)]
56pub enum PQChunksError {
57    /// Provided dimension was zero.
58    #[error("Dimension must be greater than zero")]
59    DimensionIsZero,
60    /// Requested PQ chunk count falls outside the valid range for the dimension.
61    #[error("Number of PQ chunks must be within [1, {dim}], received {num_chunks}")]
62    OutOfRange {
63        /// Requested PQ chunk count.
64        num_chunks: usize,
65        /// Dimension used to validate the chunk count.
66        dim: usize,
67    },
68}
69
70impl From<PQChunksError> for ANNError {
71    fn from(value: PQChunksError) -> Self {
72        ANNError::log_index_config_error("NumPQChunks".to_string(), format!("{value:?}"))
73    }
74}
75
76/// Validated PQ chunk count used during disk index construction.
77#[derive(Clone, Copy, PartialEq, Debug)]
78pub struct NumPQChunks(NonZeroUsize);
79
80impl NumPQChunks {
81    /// Create a validated PQ chunk count.
82    pub fn new_with(num_chunks: usize, dim: usize) -> Result<Self, PQChunksError> {
83        if dim == 0 {
84            return Err(PQChunksError::DimensionIsZero);
85        }
86
87        let num_chunks = NonZeroUsize::new(num_chunks).ok_or(PQChunksError::DimensionIsZero)?;
88
89        if num_chunks.get() > dim {
90            return Err(PQChunksError::OutOfRange {
91                dim,
92                num_chunks: num_chunks.get(),
93            });
94        }
95
96        Ok(Self(num_chunks))
97    }
98
99    /// Get the raw chunk count.
100    pub fn get(self) -> usize {
101        self.0.into()
102    }
103}
104
105/// Parameters specific for disk index construction.
106#[derive(Clone, Copy, PartialEq, Debug)]
107pub struct DiskIndexBuildParameters {
108    /// Limit on the memory allowed for building the index.
109    build_memory_limit: MemoryBudget,
110
111    /// Number of PQ chunks stored in-memory for search and to be generated during build.
112    search_pq_chunks: NumPQChunks,
113
114    /// QuantizationType used to instantiate quantized DataProvider for DiskANN Index during build.
115    build_quantization: QuantizationType,
116}
117
118impl DiskIndexBuildParameters {
119    /// Create new build parameters from already validated components.
120    pub fn new(
121        build_memory_limit: MemoryBudget,
122        build_quantization: QuantizationType,
123        search_pq_chunks: NumPQChunks,
124    ) -> Self {
125        Self {
126            build_memory_limit,
127            search_pq_chunks,
128            build_quantization,
129        }
130    }
131
132    /// Get the configured memory budget for index building.
133    pub fn build_memory_limit(&self) -> MemoryBudget {
134        self.build_memory_limit
135    }
136
137    /// Get quantization type used to instantiate quantized DataProvider for DiskANN Index during build
138    pub fn build_quantization(&self) -> &QuantizationType {
139        &self.build_quantization
140    }
141
142    /// Get user specified PQ chunks count for in-memory search data.
143    pub fn search_pq_chunks(&self) -> NumPQChunks {
144        self.search_pq_chunks
145    }
146}
147
148#[cfg(test)]
149mod dataset_test {
150    use diskann::{ANNError, ANNErrorKind};
151
152    use super::*;
153
154    #[test]
155    fn memory_budget_converts_units() {
156        let budget = MemoryBudget::try_from_gb(2.0).unwrap();
157        assert_eq!(budget.in_bytes() as f64, 2.0 * BYTES_IN_GB);
158        assert!(MemoryBudget::try_from_gb(0.0).is_err());
159    }
160
161    #[test]
162    fn build_with_num_of_pq_chunks_should_work() {
163        let memory_budget = MemoryBudget::try_from_gb(2.0).unwrap();
164        let num_pq_chunks = NumPQChunks::new_with(20, 128).unwrap();
165
166        let result = DiskIndexBuildParameters::new(
167            memory_budget,
168            QuantizationType::default(),
169            num_pq_chunks,
170        );
171
172        assert_eq!(result.search_pq_chunks().get(), num_pq_chunks.get());
173    }
174
175    #[test]
176    fn disk_index_build_parameters_try_new_handles_invalid() {
177        // Test valid parameters
178        let memory_budget = MemoryBudget::try_from_gb(1.0).unwrap();
179        let pq_chunks = NumPQChunks::new_with(1, 128).unwrap();
180        let params =
181            DiskIndexBuildParameters::new(memory_budget, QuantizationType::default(), pq_chunks);
182
183        assert_eq!(
184            params.build_memory_limit().in_bytes() as f64,
185            1.0 * BYTES_IN_GB
186        );
187
188        // Test invalid parameters
189        assert!(MemoryBudget::try_from_gb(0.0).is_err());
190
191        let err = MemoryBudget::try_from_gb(-1.0)
192            .map_err(ANNError::from)
193            .unwrap_err();
194        assert_eq!(err.kind(), ANNErrorKind::IndexConfigError);
195    }
196
197    #[test]
198    fn num_pq_chunks_new_rejects_invalid_values() {
199        assert!(NumPQChunks::new_with(0, 128).is_err());
200        assert!(NumPQChunks::new_with(129, 128).is_err());
201        assert!(NumPQChunks::new_with(1, 0).is_err());
202    }
203
204    #[test]
205    fn num_pq_chunks_new_accepts_valid_values() {
206        let chunks = NumPQChunks::new_with(64, 128).unwrap();
207        assert_eq!(chunks.get(), 64);
208    }
209}