lance_index/optimize.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::HashMap;
5use std::sync::Arc;
6
7use crate::progress::{IndexBuildProgress, noop_progress};
8
9/// Options for optimizing all indices.
10#[non_exhaustive]
11#[derive(Debug, Clone)]
12pub struct OptimizeOptions {
13 /// Number of delta indices to merge for one column. Default: 1.
14 ///
15 /// If `num_indices_to_merge` is None, lance will create a new delta index if no partition is split, otherwise it will merge all delta indices.
16 /// If `num_indices_to_merge` is Some(N), the delta updates and latest N indices
17 /// will be merged into one single index.
18 ///
19 /// It is up to the caller to decide how many indices to merge / keep. Callers can
20 /// find out how many indices are there by calling `Dataset::index_statistics`.
21 ///
22 /// A common usage pattern will be that, the caller can keep a large snapshot of the index of the base version,
23 /// and accumulate a few delta indices, then merge them into the snapshot.
24 pub num_indices_to_merge: Option<usize>,
25
26 /// the index names to optimize. If None, all indices will be optimized.
27 pub index_names: Option<Vec<String>>,
28
29 /// whether to retrain the whole index. Default: false.
30 ///
31 /// If true, the index will be retrained based on the current data,
32 /// `num_indices_to_merge` will be ignored, and all indices will be merged into one.
33 /// If false, the index will be optimized by merging `num_indices_to_merge` indices.
34 ///
35 /// This is useful when the data distribution has changed significantly,
36 /// and we want to retrain the index to improve the search quality.
37 /// This would be faster than re-create the index from scratch.
38 ///
39 /// NOTE: this option is only supported for v3 vector indices.
40 pub retrain: bool,
41
42 /// Transaction properties to store with this commit.
43 ///
44 /// These key-value pairs are stored in the transaction file
45 /// and can be read later to identify the source of the commit
46 /// (e.g., job_id for tracking completed index jobs).
47 pub transaction_properties: Option<Arc<HashMap<String, String>>>,
48
49 /// Progress callback for index building during optimization.
50 pub progress: Arc<dyn IndexBuildProgress>,
51}
52
53impl Default for OptimizeOptions {
54 fn default() -> Self {
55 Self {
56 num_indices_to_merge: None,
57 index_names: None,
58 retrain: false,
59 transaction_properties: None,
60 progress: noop_progress(),
61 }
62 }
63}
64
65impl OptimizeOptions {
66 pub fn new() -> Self {
67 Self::default()
68 }
69
70 pub fn merge(num: usize) -> Self {
71 Self {
72 num_indices_to_merge: Some(num),
73 index_names: None,
74 ..Default::default()
75 }
76 }
77
78 pub fn append() -> Self {
79 Self {
80 num_indices_to_merge: Some(0),
81 index_names: None,
82 ..Default::default()
83 }
84 }
85
86 pub fn retrain() -> Self {
87 Self {
88 num_indices_to_merge: None,
89 index_names: None,
90 retrain: true,
91 ..Default::default()
92 }
93 }
94
95 pub fn num_indices_to_merge(mut self, num: Option<usize>) -> Self {
96 self.num_indices_to_merge = num;
97 self
98 }
99
100 pub fn index_names(mut self, names: Vec<String>) -> Self {
101 self.index_names = Some(names);
102 self
103 }
104
105 /// Set transaction properties to store in the commit manifest.
106 pub fn transaction_properties(mut self, properties: HashMap<String, String>) -> Self {
107 self.transaction_properties = Some(Arc::new(properties));
108 self
109 }
110
111 /// Set progress callback for index building during optimization.
112 pub fn progress(mut self, progress: Arc<dyn IndexBuildProgress>) -> Self {
113 self.progress = progress;
114 self
115 }
116}