lance_index/
optimize.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4/// Options for optimizing all indices.
5#[derive(Debug)]
6pub struct OptimizeOptions {
7    /// Number of delta indices to merge for one column. Default: 1.
8    ///
9    /// If `num_indices_to_merge` is 0, a new delta index will be created.
10    /// If `num_indices_to_merge` is 1, the delta updates will be merged into the latest index.
11    /// If `num_indices_to_merge` is more than 1, the delta updates and latest N indices
12    /// will be merged into one single index.
13    ///
14    /// It is up to the caller to decide how many indices to merge / keep. Callers can
15    /// find out how many indices are there by calling [`Dataset::index_statistics`].
16    ///
17    /// A common usage pattern will be that, the caller can keep a large snapshot of the index of the base version,
18    /// and accumulate a few delta indices, then merge them into the snapshot.
19    pub num_indices_to_merge: usize,
20
21    /// the index names to optimize. If None, all indices will be optimized.
22    pub index_names: Option<Vec<String>>,
23
24    /// whether to retrain the whole index. Default: false.
25    ///
26    /// If true, the index will be retrained based on the current data,
27    /// `num_indices_to_merge` will be ignored, and all indices will be merged into one.
28    /// If false, the index will be optimized by merging `num_indices_to_merge` indices.
29    ///
30    /// This is useful when the data distribution has changed significantly,
31    /// and we want to retrain the index to improve the search quality.
32    /// This would be faster than re-create the index from scratch.
33    ///
34    /// NOTE: this option is only supported for v3 vector indices.
35    pub retrain: bool,
36}
37
38impl Default for OptimizeOptions {
39    fn default() -> Self {
40        Self {
41            num_indices_to_merge: 1,
42            index_names: None,
43            retrain: false,
44        }
45    }
46}
47
48impl OptimizeOptions {
49    pub fn new() -> Self {
50        Self {
51            num_indices_to_merge: 1,
52            index_names: None,
53            retrain: false,
54        }
55    }
56
57    pub fn append() -> Self {
58        Self {
59            num_indices_to_merge: 0,
60            index_names: None,
61            retrain: false,
62        }
63    }
64
65    pub fn retrain() -> Self {
66        Self {
67            num_indices_to_merge: 0,
68            index_names: None,
69            retrain: true,
70        }
71    }
72
73    pub fn num_indices_to_merge(mut self, num: usize) -> Self {
74        self.num_indices_to_merge = num;
75        self
76    }
77
78    pub fn index_names(mut self, names: Vec<String>) -> Self {
79        self.index_names = Some(names);
80        self
81    }
82}