Skip to main content

lance_index/
optimize.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::HashMap;
5use std::sync::Arc;
6
7use crate::progress::{IndexBuildProgress, noop_progress};
8
9/// Options for optimizing all indices.
10#[non_exhaustive]
11#[derive(Debug, Clone)]
12pub struct OptimizeOptions {
13    /// Number of delta indices to merge for one column. Default: 1.
14    ///
15    /// If `num_indices_to_merge` is None, lance will create a new delta index if no partition is split, otherwise it will merge all delta indices.
16    /// If `num_indices_to_merge` is Some(N), the delta updates and latest N indices
17    /// will be merged into one single index.
18    ///
19    /// It is up to the caller to decide how many indices to merge / keep. Callers can
20    /// find out how many indices are there by calling `Dataset::index_statistics`.
21    ///
22    /// A common usage pattern will be that, the caller can keep a large snapshot of the index of the base version,
23    /// and accumulate a few delta indices, then merge them into the snapshot.
24    pub num_indices_to_merge: Option<usize>,
25
26    /// the index names to optimize. If None, all indices will be optimized.
27    pub index_names: Option<Vec<String>>,
28
29    /// whether to retrain the whole index. Default: false.
30    ///
31    /// If true, the index will be retrained based on the current data,
32    /// `num_indices_to_merge` will be ignored, and all indices will be merged into one.
33    /// If false, the index will be optimized by merging `num_indices_to_merge` indices.
34    ///
35    /// This is useful when the data distribution has changed significantly,
36    /// and we want to retrain the index to improve the search quality.
37    /// This would be faster than re-create the index from scratch.
38    ///
39    /// NOTE: this option is only supported for v3 vector indices.
40    pub retrain: bool,
41
42    /// Transaction properties to store with this commit.
43    ///
44    /// These key-value pairs are stored in the transaction file
45    /// and can be read later to identify the source of the commit
46    /// (e.g., job_id for tracking completed index jobs).
47    pub transaction_properties: Option<Arc<HashMap<String, String>>>,
48
49    /// Progress callback for index building during optimization.
50    pub progress: Arc<dyn IndexBuildProgress>,
51}
52
53impl Default for OptimizeOptions {
54    fn default() -> Self {
55        Self {
56            num_indices_to_merge: None,
57            index_names: None,
58            retrain: false,
59            transaction_properties: None,
60            progress: noop_progress(),
61        }
62    }
63}
64
65impl OptimizeOptions {
66    pub fn new() -> Self {
67        Self::default()
68    }
69
70    pub fn merge(num: usize) -> Self {
71        Self {
72            num_indices_to_merge: Some(num),
73            index_names: None,
74            ..Default::default()
75        }
76    }
77
78    pub fn append() -> Self {
79        Self {
80            num_indices_to_merge: Some(0),
81            index_names: None,
82            ..Default::default()
83        }
84    }
85
86    pub fn retrain() -> Self {
87        Self {
88            num_indices_to_merge: None,
89            index_names: None,
90            retrain: true,
91            ..Default::default()
92        }
93    }
94
95    pub fn num_indices_to_merge(mut self, num: Option<usize>) -> Self {
96        self.num_indices_to_merge = num;
97        self
98    }
99
100    pub fn index_names(mut self, names: Vec<String>) -> Self {
101        self.index_names = Some(names);
102        self
103    }
104
105    /// Set transaction properties to store in the commit manifest.
106    pub fn transaction_properties(mut self, properties: HashMap<String, String>) -> Self {
107        self.transaction_properties = Some(Arc::new(properties));
108        self
109    }
110
111    /// Set progress callback for index building during optimization.
112    pub fn progress(mut self, progress: Arc<dyn IndexBuildProgress>) -> Self {
113        self.progress = progress;
114        self
115    }
116}