1use std::{any::Any, sync::Arc};
13
14use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
15use crate::mem_wal::MEM_WAL_INDEX_NAME;
16use async_trait::async_trait;
17use deepsize::DeepSizeOf;
18use lance_core::{Error, Result};
19use roaring::RoaringBitmap;
20use serde::{Deserialize, Serialize};
21use std::convert::TryFrom;
22
23pub mod frag_reuse;
24pub mod mem_wal;
25pub mod metrics;
26pub mod optimize;
27pub mod prefilter;
28pub mod progress;
29pub mod registry;
30pub mod scalar;
31pub mod traits;
32pub mod vector;
33
34pub use crate::traits::*;
35
36pub const INDEX_FILE_NAME: &str = "index.idx";
37pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
42pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
43
44pub const VECTOR_INDEX_VERSION: u32 = 1;
49pub const IVF_RQ_INDEX_VERSION: u32 = 2;
51
52pub const MAX_PARTITION_SIZE_FACTOR: usize = 4;
59pub const MIN_PARTITION_SIZE_PERCENT: usize = 25;
60
61pub mod pb {
62 #![allow(clippy::use_self)]
63 include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
64}
65
66pub mod pbold {
67 #![allow(clippy::use_self)]
68 include!(concat!(env!("OUT_DIR"), "/lance.table.rs"));
69}
70
71#[async_trait]
74pub trait Index: Send + Sync + DeepSizeOf {
75 fn as_any(&self) -> &dyn Any;
77
78 fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
80
81 fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
83
84 fn statistics(&self) -> Result<serde_json::Value>;
86
87 async fn prewarm(&self) -> Result<()>;
91
92 fn index_type(&self) -> IndexType;
94
95 async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
100}
101
102#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
104pub enum IndexType {
105 Scalar = 0, BTree = 1, Bitmap = 2, LabelList = 3, Inverted = 4, NGram = 5, FragmentReuse = 6,
119
120 MemWal = 7,
121
122 ZoneMap = 8, BloomFilter = 9, RTree = 10, Vector = 100, IvfFlat = 101,
132 IvfSq = 102,
133 IvfPq = 103,
134 IvfHnswSq = 104,
135 IvfHnswPq = 105,
136 IvfHnswFlat = 106,
137 IvfRq = 107,
138}
139
140impl std::fmt::Display for IndexType {
141 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
142 match self {
143 Self::Scalar | Self::BTree => write!(f, "BTree"),
144 Self::Bitmap => write!(f, "Bitmap"),
145 Self::LabelList => write!(f, "LabelList"),
146 Self::Inverted => write!(f, "Inverted"),
147 Self::NGram => write!(f, "NGram"),
148 Self::FragmentReuse => write!(f, "FragmentReuse"),
149 Self::MemWal => write!(f, "MemWal"),
150 Self::ZoneMap => write!(f, "ZoneMap"),
151 Self::BloomFilter => write!(f, "BloomFilter"),
152 Self::RTree => write!(f, "RTree"),
153 Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
154 Self::IvfFlat => write!(f, "IVF_FLAT"),
155 Self::IvfSq => write!(f, "IVF_SQ"),
156 Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
157 Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
158 Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
159 Self::IvfRq => write!(f, "IVF_RQ"),
160 }
161 }
162}
163
164impl TryFrom<i32> for IndexType {
165 type Error = Error;
166
167 fn try_from(value: i32) -> Result<Self> {
168 match value {
169 v if v == Self::Scalar as i32 => Ok(Self::Scalar),
170 v if v == Self::BTree as i32 => Ok(Self::BTree),
171 v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
172 v if v == Self::LabelList as i32 => Ok(Self::LabelList),
173 v if v == Self::NGram as i32 => Ok(Self::NGram),
174 v if v == Self::Inverted as i32 => Ok(Self::Inverted),
175 v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
176 v if v == Self::MemWal as i32 => Ok(Self::MemWal),
177 v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
178 v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
179 v if v == Self::Vector as i32 => Ok(Self::Vector),
180 v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
181 v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
182 v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
183 v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
184 v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
185 v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
186 v if v == Self::IvfRq as i32 => Ok(Self::IvfRq),
187 _ => Err(Error::invalid_input_source(
188 format!("the input value {} is not a valid IndexType", value).into(),
189 )),
190 }
191 }
192}
193
194impl TryFrom<&str> for IndexType {
195 type Error = Error;
196
197 fn try_from(value: &str) -> Result<Self> {
198 match value {
199 "BTree" | "BTREE" => Ok(Self::BTree),
200 "Bitmap" | "BITMAP" => Ok(Self::Bitmap),
201 "LabelList" | "LABELLIST" => Ok(Self::LabelList),
202 "Inverted" | "INVERTED" => Ok(Self::Inverted),
203 "NGram" | "NGRAM" => Ok(Self::NGram),
204 "ZoneMap" | "ZONEMAP" => Ok(Self::ZoneMap),
205 "Vector" | "VECTOR" => Ok(Self::Vector),
206 "IVF_FLAT" => Ok(Self::IvfFlat),
207 "IVF_SQ" => Ok(Self::IvfSq),
208 "IVF_PQ" => Ok(Self::IvfPq),
209 "IVF_RQ" => Ok(Self::IvfRq),
210 "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
211 "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
212 "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
213 "FragmentReuse" => Ok(Self::FragmentReuse),
214 "MemWal" => Ok(Self::MemWal),
215 _ => Err(Error::invalid_input(format!(
216 "invalid index type: {}",
217 value
218 ))),
219 }
220 }
221}
222
223impl IndexType {
224 pub fn is_scalar(&self) -> bool {
225 matches!(
226 self,
227 Self::Scalar
228 | Self::BTree
229 | Self::Bitmap
230 | Self::LabelList
231 | Self::Inverted
232 | Self::NGram
233 | Self::ZoneMap
234 | Self::BloomFilter
235 | Self::RTree,
236 )
237 }
238
239 pub fn is_vector(&self) -> bool {
240 matches!(
241 self,
242 Self::Vector
243 | Self::IvfPq
244 | Self::IvfHnswSq
245 | Self::IvfHnswPq
246 | Self::IvfHnswFlat
247 | Self::IvfFlat
248 | Self::IvfSq
249 | Self::IvfRq
250 )
251 }
252
253 pub fn is_system(&self) -> bool {
254 matches!(self, Self::FragmentReuse | Self::MemWal)
255 }
256
257 pub fn version(&self) -> i32 {
263 match self {
264 Self::Scalar => 0,
265 Self::BTree => 0,
266 Self::Bitmap => 0,
267 Self::LabelList => 0,
268 Self::Inverted => 0,
269 Self::NGram => 0,
270 Self::FragmentReuse => 0,
271 Self::MemWal => 0,
272 Self::ZoneMap => 0,
273 Self::BloomFilter => 0,
274 Self::RTree => 0,
275
276 Self::Vector
283 | Self::IvfFlat
284 | Self::IvfSq
285 | Self::IvfPq
286 | Self::IvfHnswSq
287 | Self::IvfHnswPq
288 | Self::IvfHnswFlat => VECTOR_INDEX_VERSION as i32,
289 Self::IvfRq => IVF_RQ_INDEX_VERSION as i32,
290 }
291 }
292
293 pub fn target_partition_size(&self) -> usize {
300 match self {
301 Self::Vector => 8192,
302 Self::IvfFlat => 4096,
303 Self::IvfSq => 8192,
304 Self::IvfPq => 8192,
305 Self::IvfHnswFlat => 1 << 20,
306 Self::IvfHnswSq => 1 << 20,
307 Self::IvfHnswPq => 1 << 20,
308 _ => 8192,
309 }
310 }
311
312 pub fn max_vector_version() -> u32 {
314 [
315 Self::Vector,
316 Self::IvfFlat,
317 Self::IvfSq,
318 Self::IvfPq,
319 Self::IvfHnswSq,
320 Self::IvfHnswPq,
321 Self::IvfHnswFlat,
322 Self::IvfRq,
323 ]
324 .into_iter()
325 .map(|index_type| index_type.version() as u32)
326 .max()
327 .unwrap_or(VECTOR_INDEX_VERSION)
328 }
329}
330
331pub trait IndexParams: Send + Sync {
332 fn as_any(&self) -> &dyn Any;
333
334 fn index_name(&self) -> &str;
335}
336
337#[derive(Serialize, Deserialize, Debug)]
338pub struct IndexMetadata {
339 #[serde(rename = "type")]
340 pub index_type: String,
341 pub distance_type: String,
342}
343
344pub fn is_system_index(index_meta: &lance_table::format::IndexMetadata) -> bool {
345 index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
346}
347
348pub fn infer_system_index_type(
349 index_meta: &lance_table::format::IndexMetadata,
350) -> Option<IndexType> {
351 if index_meta.name == FRAG_REUSE_INDEX_NAME {
352 Some(IndexType::FragmentReuse)
353 } else if index_meta.name == MEM_WAL_INDEX_NAME {
354 Some(IndexType::MemWal)
355 } else {
356 None
357 }
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363
364 #[test]
365 fn test_ivf_rq_has_dedicated_index_version() {
366 assert!(IndexType::IvfRq.version() > IndexType::IvfPq.version());
367 assert_eq!(IndexType::IvfRq.version() as u32, IVF_RQ_INDEX_VERSION);
368 }
369
370 #[test]
371 fn test_max_vector_version_tracks_highest_supported() {
372 assert_eq!(IndexType::max_vector_version(), IVF_RQ_INDEX_VERSION);
373 }
374}