Skip to main content

lance_index/
optimize.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::HashMap;
5use std::sync::Arc;
6
7/// Options for optimizing all indices.
8#[non_exhaustive]
9#[derive(Debug, Clone, Default)]
10pub struct OptimizeOptions {
11    /// Number of delta indices to merge for one column. Default: 1.
12    ///
13    /// If `num_indices_to_merge` is None, lance will create a new delta index if no partition is split, otherwise it will merge all delta indices.
14    /// If `num_indices_to_merge` is Some(N), the delta updates and latest N indices
15    /// will be merged into one single index.
16    ///
17    /// It is up to the caller to decide how many indices to merge / keep. Callers can
18    /// find out how many indices are there by calling `Dataset::index_statistics`.
19    ///
20    /// A common usage pattern will be that, the caller can keep a large snapshot of the index of the base version,
21    /// and accumulate a few delta indices, then merge them into the snapshot.
22    pub num_indices_to_merge: Option<usize>,
23
24    /// the index names to optimize. If None, all indices will be optimized.
25    pub index_names: Option<Vec<String>>,
26
27    /// whether to retrain the whole index. Default: false.
28    ///
29    /// If true, the index will be retrained based on the current data,
30    /// `num_indices_to_merge` will be ignored, and all indices will be merged into one.
31    /// If false, the index will be optimized by merging `num_indices_to_merge` indices.
32    ///
33    /// This is useful when the data distribution has changed significantly,
34    /// and we want to retrain the index to improve the search quality.
35    /// This would be faster than re-create the index from scratch.
36    ///
37    /// NOTE: this option is only supported for v3 vector indices.
38    pub retrain: bool,
39
40    /// Transaction properties to store with this commit.
41    ///
42    /// These key-value pairs are stored in the transaction file
43    /// and can be read later to identify the source of the commit
44    /// (e.g., job_id for tracking completed index jobs).
45    pub transaction_properties: Option<Arc<HashMap<String, String>>>,
46}
47
48impl OptimizeOptions {
49    pub fn new() -> Self {
50        Self::default()
51    }
52
53    pub fn merge(num: usize) -> Self {
54        Self {
55            num_indices_to_merge: Some(num),
56            index_names: None,
57            ..Default::default()
58        }
59    }
60
61    pub fn append() -> Self {
62        Self {
63            num_indices_to_merge: Some(0),
64            index_names: None,
65            ..Default::default()
66        }
67    }
68
69    pub fn retrain() -> Self {
70        Self {
71            num_indices_to_merge: None,
72            index_names: None,
73            retrain: true,
74            ..Default::default()
75        }
76    }
77
78    pub fn num_indices_to_merge(mut self, num: Option<usize>) -> Self {
79        self.num_indices_to_merge = num;
80        self
81    }
82
83    pub fn index_names(mut self, names: Vec<String>) -> Self {
84        self.index_names = Some(names);
85        self
86    }
87
88    /// Set transaction properties to store in the commit manifest.
89    pub fn transaction_properties(mut self, properties: HashMap<String, String>) -> Self {
90        self.transaction_properties = Some(Arc::new(properties));
91        self
92    }
93}