Skip to main content

diskann_disk/build/chunking/continuation/
chunking_config.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6use core::fmt;
7
8use super::continuation_tracker::{ContinuationTrackerTrait, NaiveContinuationTracker};
9
10const PQ_DEFAULT_BATCH_SIZE: usize = 5000000;
11const PQ_COMPRESSION_DEFAULT_CHUNK_SIZE: usize = 25_000;
12
13/// Configuraton used for chunked index build.
14pub struct ChunkingConfig {
15    // Continuation grant provider to be used for getting continuation grants during chunk intervals.
16    pub continuation_checker: Box<dyn ContinuationTrackerTrait>,
17
18    // The size of each chunk in terms of number of vectors during pq compression.
19    pub data_compression_chunk_vector_count: usize,
20
21    // The size of each chunk in terms of number of vectors during in-memory index build.
22    pub inmemory_build_chunk_vector_count: usize,
23}
24
25impl Default for ChunkingConfig {
26    // Default ChunkingConfig that tries to build the entire index in one go.
27    fn default() -> Self {
28        ChunkingConfig {
29            continuation_checker: Box::<NaiveContinuationTracker>::default(),
30            data_compression_chunk_vector_count: PQ_COMPRESSION_DEFAULT_CHUNK_SIZE,
31            inmemory_build_chunk_vector_count: PQ_DEFAULT_BATCH_SIZE,
32        }
33    }
34}
35
36impl Clone for ChunkingConfig {
37    fn clone(&self) -> Self {
38        ChunkingConfig {
39            continuation_checker: self.continuation_checker.clone_box(),
40            data_compression_chunk_vector_count: self.data_compression_chunk_vector_count,
41            inmemory_build_chunk_vector_count: self.inmemory_build_chunk_vector_count,
42        }
43    }
44}
45
46impl fmt::Display for ChunkingConfig {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        write!(
49            f,
50            "ChunkingConfig: data_compression_chunk_vector_count: {}, inmemory_build_chunk_vector_count: {} }}",
51            self.data_compression_chunk_vector_count, self.inmemory_build_chunk_vector_count
52        )
53    }
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59
60    #[test]
61    fn test_chunking_config_clone() {
62        let config = ChunkingConfig {
63            continuation_checker: Box::<NaiveContinuationTracker>::default(),
64            data_compression_chunk_vector_count: 1000,
65            inmemory_build_chunk_vector_count: 5000,
66        };
67
68        let cloned = config.clone();
69
70        assert_eq!(
71            config.data_compression_chunk_vector_count,
72            cloned.data_compression_chunk_vector_count
73        );
74        assert_eq!(
75            config.inmemory_build_chunk_vector_count,
76            cloned.inmemory_build_chunk_vector_count
77        );
78    }
79
80    #[test]
81    fn test_chunking_config_display() {
82        let config = ChunkingConfig {
83            continuation_checker: Box::<NaiveContinuationTracker>::default(),
84            data_compression_chunk_vector_count: 1234,
85            inmemory_build_chunk_vector_count: 5678,
86        };
87
88        let display_string = format!("{}", config);
89
90        assert!(display_string.contains("ChunkingConfig"));
91        assert!(display_string.contains("1234"));
92        assert!(display_string.contains("5678"));
93        assert!(display_string.contains("data_compression_chunk_vector_count"));
94        assert!(display_string.contains("inmemory_build_chunk_vector_count"));
95    }
96
97    #[test]
98    fn test_chunking_config_default() {
99        let config = ChunkingConfig::default();
100
101        assert_eq!(
102            config.data_compression_chunk_vector_count,
103            PQ_COMPRESSION_DEFAULT_CHUNK_SIZE
104        );
105        assert_eq!(
106            config.inmemory_build_chunk_vector_count,
107            PQ_DEFAULT_BATCH_SIZE
108        );
109    }
110}