lance_index/optimize.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4/// Options for optimizing all indices.
5#[derive(Debug)]
6pub struct OptimizeOptions {
7 /// Number of delta indices to merge for one column. Default: 1.
8 ///
9 /// If `num_indices_to_merge` is 0, a new delta index will be created.
10 /// If `num_indices_to_merge` is 1, the delta updates will be merged into the latest index.
11 /// If `num_indices_to_merge` is more than 1, the delta updates and latest N indices
12 /// will be merged into one single index.
13 ///
14 /// It is up to the caller to decide how many indices to merge / keep. Callers can
15 /// find out how many indices are there by calling [`Dataset::index_statistics`].
16 ///
17 /// A common usage pattern will be that, the caller can keep a large snapshot of the index of the base version,
18 /// and accumulate a few delta indices, then merge them into the snapshot.
19 pub num_indices_to_merge: usize,
20
21 /// the index names to optimize. If None, all indices will be optimized.
22 pub index_names: Option<Vec<String>>,
23
24 /// whether to retrain the whole index. Default: false.
25 ///
26 /// If true, the index will be retrained based on the current data,
27 /// `num_indices_to_merge` will be ignored, and all indices will be merged into one.
28 /// If false, the index will be optimized by merging `num_indices_to_merge` indices.
29 ///
30 /// This is useful when the data distribution has changed significantly,
31 /// and we want to retrain the index to improve the search quality.
32 /// This would be faster than re-create the index from scratch.
33 ///
34 /// NOTE: this option is only supported for v3 vector indices.
35 pub retrain: bool,
36}
37
38impl Default for OptimizeOptions {
39 fn default() -> Self {
40 Self {
41 num_indices_to_merge: 1,
42 index_names: None,
43 retrain: false,
44 }
45 }
46}
47
48impl OptimizeOptions {
49 pub fn new() -> Self {
50 Self {
51 num_indices_to_merge: 1,
52 index_names: None,
53 retrain: false,
54 }
55 }
56
57 pub fn append() -> Self {
58 Self {
59 num_indices_to_merge: 0,
60 index_names: None,
61 retrain: false,
62 }
63 }
64
65 pub fn retrain() -> Self {
66 Self {
67 num_indices_to_merge: 0,
68 index_names: None,
69 retrain: true,
70 }
71 }
72
73 pub fn num_indices_to_merge(mut self, num: usize) -> Self {
74 self.num_indices_to_merge = num;
75 self
76 }
77
78 pub fn index_names(mut self, names: Vec<String>) -> Self {
79 self.index_names = Some(names);
80 self
81 }
82}