diskann_disk/build/chunking/continuation/
chunking_config.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6use core::fmt;
7
8use super::continuation_tracker::{ContinuationTrackerTrait, NaiveContinuationTracker};
9
10const PQ_DEFAULT_BATCH_SIZE: usize = 5000000;
11const PQ_COMPRESSION_DEFAULT_CHUNK_SIZE: usize = 25_000;
12
13/// Configuraton used for chunked index build.
14pub struct ChunkingConfig {
15    // Continuation grant provider to be used for getting continuation grants during chunk intervals.
16    pub continuation_checker: Box<dyn ContinuationTrackerTrait>,
17
18    // The size of each chunk in terms of number of vectors during pq compression.
19    pub data_compression_chunk_vector_count: usize,
20
21    // The size of each chunk in terms of number of vectors during in-memory index build.
22    pub inmemory_build_chunk_vector_count: usize,
23}
24
25impl Default for ChunkingConfig {
26    // Default ChunkingConfig that tries to build the entire index in one go.
27    fn default() -> Self {
28        ChunkingConfig {
29            continuation_checker: Box::<NaiveContinuationTracker>::default(),
30            data_compression_chunk_vector_count: PQ_COMPRESSION_DEFAULT_CHUNK_SIZE,
31            inmemory_build_chunk_vector_count: PQ_DEFAULT_BATCH_SIZE,
32        }
33    }
34}
35
36impl Clone for ChunkingConfig {
37    fn clone(&self) -> Self {
38        ChunkingConfig {
39            continuation_checker: self.continuation_checker.clone_box(),
40            data_compression_chunk_vector_count: self.data_compression_chunk_vector_count,
41            inmemory_build_chunk_vector_count: self.inmemory_build_chunk_vector_count,
42        }
43    }
44}
45
46impl fmt::Display for ChunkingConfig {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        write!(
49            f,
50            "ChunkingConfig: data_compression_chunk_vector_count: {}, inmemory_build_chunk_vector_count: {} }}",
51            self.data_compression_chunk_vector_count, self.inmemory_build_chunk_vector_count
52        )
53    }
54}