lance_index/
optimize.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4/// Options for optimizing all indices.
5#[derive(Debug)]
6pub struct OptimizeOptions {
7    /// Number of delta indices to merge for one column. Default: 1.
8    ///
9    /// If `num_indices_to_merge` is 0, a new delta index will be created.
10    /// If `num_indices_to_merge` is 1, the delta updates will be merged into the latest index.
11    /// If `num_indices_to_merge` is more than 1, the delta updates and latest N indices
12    /// will be merged into one single index.
13    ///
14    /// It is up to the caller to decide how many indices to merge / keep. Callers can
15    /// find out how many indices are there by calling [`Dataset::index_statistics`].
16    ///
17    /// A common usage pattern will be that, the caller can keep a large snapshot of the index of the base version,
18    /// and accumulate a few delta indices, then merge them into the snapshot.
19    pub num_indices_to_merge: usize,
20
21    /// the index names to optimize. If None, all indices will be optimized.
22    pub index_names: Option<Vec<String>>,
23
24    /// whether to retrain the whole index. Default: false.
25    ///
26    /// If true, the index will be retrained based on the current data,
27    /// `num_indices_to_merge` will be ignored, and all indices will be merged into one.
28    /// If false, the index will be optimized by merging `num_indices_to_merge` indices.
29    ///
30    /// This is useful when the data distribution has changed significantly,
31    /// and we want to retrain the index to improve the search quality.
32    /// This would be faster than re-create the index from scratch.
33    ///
34    /// NOTE: this option is only supported for v3 vector indices.
35    #[deprecated(
36        since = "0.35.0",
37        note = "lance>=0.35.0 always incrementally updates the index, this option is ignored"
38    )]
39    pub retrain: bool,
40}
41
42impl Default for OptimizeOptions {
43    fn default() -> Self {
44        Self {
45            num_indices_to_merge: 1,
46            index_names: None,
47            #[allow(deprecated)]
48            retrain: false,
49        }
50    }
51}
52
53impl OptimizeOptions {
54    pub fn new() -> Self {
55        Self {
56            num_indices_to_merge: 1,
57            index_names: None,
58            ..Default::default()
59        }
60    }
61
62    pub fn append() -> Self {
63        Self {
64            num_indices_to_merge: 0,
65            index_names: None,
66            ..Default::default()
67        }
68    }
69
70    #[deprecated(
71        since = "0.35.0",
72        note = "lance>=0.35.0 always incrementally updates the index, this option is ignored"
73    )]
74    pub fn retrain() -> Self {
75        Self {
76            num_indices_to_merge: 0,
77            index_names: None,
78            ..Default::default()
79        }
80    }
81
82    pub fn num_indices_to_merge(mut self, num: usize) -> Self {
83        self.num_indices_to_merge = num;
84        self
85    }
86
87    pub fn index_names(mut self, names: Vec<String>) -> Self {
88        self.index_names = Some(names);
89        self
90    }
91}