lance_index/optimize.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::HashMap;
5use std::sync::Arc;
6
7/// Options for optimizing all indices.
8#[non_exhaustive]
9#[derive(Debug, Clone, Default)]
10pub struct OptimizeOptions {
11 /// Number of delta indices to merge for one column. Default: 1.
12 ///
13 /// If `num_indices_to_merge` is None, lance will create a new delta index if no partition is split, otherwise it will merge all delta indices.
14 /// If `num_indices_to_merge` is Some(N), the delta updates and latest N indices
15 /// will be merged into one single index.
16 ///
17 /// It is up to the caller to decide how many indices to merge / keep. Callers can
18 /// find out how many indices are there by calling `Dataset::index_statistics`.
19 ///
20 /// A common usage pattern will be that, the caller can keep a large snapshot of the index of the base version,
21 /// and accumulate a few delta indices, then merge them into the snapshot.
22 pub num_indices_to_merge: Option<usize>,
23
24 /// the index names to optimize. If None, all indices will be optimized.
25 pub index_names: Option<Vec<String>>,
26
27 /// whether to retrain the whole index. Default: false.
28 ///
29 /// If true, the index will be retrained based on the current data,
30 /// `num_indices_to_merge` will be ignored, and all indices will be merged into one.
31 /// If false, the index will be optimized by merging `num_indices_to_merge` indices.
32 ///
33 /// This is useful when the data distribution has changed significantly,
34 /// and we want to retrain the index to improve the search quality.
35 /// This would be faster than re-create the index from scratch.
36 ///
37 /// NOTE: this option is only supported for v3 vector indices.
38 pub retrain: bool,
39
40 /// Transaction properties to store with this commit.
41 ///
42 /// These key-value pairs are stored in the transaction file
43 /// and can be read later to identify the source of the commit
44 /// (e.g., job_id for tracking completed index jobs).
45 pub transaction_properties: Option<Arc<HashMap<String, String>>>,
46}
47
48impl OptimizeOptions {
49 pub fn new() -> Self {
50 Self::default()
51 }
52
53 pub fn merge(num: usize) -> Self {
54 Self {
55 num_indices_to_merge: Some(num),
56 index_names: None,
57 ..Default::default()
58 }
59 }
60
61 pub fn append() -> Self {
62 Self {
63 num_indices_to_merge: Some(0),
64 index_names: None,
65 ..Default::default()
66 }
67 }
68
69 pub fn retrain() -> Self {
70 Self {
71 num_indices_to_merge: None,
72 index_names: None,
73 retrain: true,
74 ..Default::default()
75 }
76 }
77
78 pub fn num_indices_to_merge(mut self, num: Option<usize>) -> Self {
79 self.num_indices_to_merge = num;
80 self
81 }
82
83 pub fn index_names(mut self, names: Vec<String>) -> Self {
84 self.index_names = Some(names);
85 self
86 }
87
88 /// Set transaction properties to store in the commit manifest.
89 pub fn transaction_properties(mut self, properties: HashMap<String, String>) -> Self {
90 self.transaction_properties = Some(Arc::new(properties));
91 self
92 }
93}