1use std::{any::Any, sync::Arc};
13
14use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
15use crate::mem_wal::MEM_WAL_INDEX_NAME;
16use async_trait::async_trait;
17use deepsize::DeepSizeOf;
18use lance_core::{Error, Result};
19use roaring::RoaringBitmap;
20use serde::{Deserialize, Serialize};
21use std::convert::TryFrom;
22
23pub mod frag_reuse;
24pub mod mem_wal;
25pub mod metrics;
26pub mod optimize;
27pub mod prefilter;
28pub mod progress;
29pub mod registry;
30pub mod scalar;
31pub mod traits;
32pub mod vector;
33
34pub use crate::traits::*;
35
36pub const INDEX_FILE_NAME: &str = "index.idx";
37pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
42pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
43
44pub const VECTOR_INDEX_VERSION: u32 = 1;
49pub const IVF_RQ_INDEX_VERSION: u32 = 2;
51
52pub const MAX_PARTITION_SIZE_FACTOR: usize = 4;
59pub const MIN_PARTITION_SIZE_PERCENT: usize = 25;
60
61pub mod pb {
62 #![allow(clippy::use_self)]
63 include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
64}
65
66pub mod pbold {
67 #![allow(clippy::use_self)]
68 include!(concat!(env!("OUT_DIR"), "/lance.table.rs"));
69}
70
71#[async_trait]
74pub trait Index: Send + Sync + DeepSizeOf {
75 fn as_any(&self) -> &dyn Any;
77
78 fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
80
81 fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
83
84 fn statistics(&self) -> Result<serde_json::Value>;
86
87 async fn prewarm(&self) -> Result<()>;
91
92 fn index_type(&self) -> IndexType;
94
95 async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
100}
101
102#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
104pub enum IndexType {
105 Scalar = 0, BTree = 1, Bitmap = 2, LabelList = 3, Inverted = 4, NGram = 5, FragmentReuse = 6,
119
120 MemWal = 7,
121
122 ZoneMap = 8, BloomFilter = 9, RTree = 10, Vector = 100, IvfFlat = 101,
132 IvfSq = 102,
133 IvfPq = 103,
134 IvfHnswSq = 104,
135 IvfHnswPq = 105,
136 IvfHnswFlat = 106,
137 IvfRq = 107,
138}
139
140impl std::fmt::Display for IndexType {
141 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
142 match self {
143 Self::Scalar | Self::BTree => write!(f, "BTree"),
144 Self::Bitmap => write!(f, "Bitmap"),
145 Self::LabelList => write!(f, "LabelList"),
146 Self::Inverted => write!(f, "Inverted"),
147 Self::NGram => write!(f, "NGram"),
148 Self::FragmentReuse => write!(f, "FragmentReuse"),
149 Self::MemWal => write!(f, "MemWal"),
150 Self::ZoneMap => write!(f, "ZoneMap"),
151 Self::BloomFilter => write!(f, "BloomFilter"),
152 Self::RTree => write!(f, "RTree"),
153 Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
154 Self::IvfFlat => write!(f, "IVF_FLAT"),
155 Self::IvfSq => write!(f, "IVF_SQ"),
156 Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
157 Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
158 Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
159 Self::IvfRq => write!(f, "IVF_RQ"),
160 }
161 }
162}
163
164impl TryFrom<i32> for IndexType {
165 type Error = Error;
166
167 fn try_from(value: i32) -> Result<Self> {
168 match value {
169 v if v == Self::Scalar as i32 => Ok(Self::Scalar),
170 v if v == Self::BTree as i32 => Ok(Self::BTree),
171 v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
172 v if v == Self::LabelList as i32 => Ok(Self::LabelList),
173 v if v == Self::NGram as i32 => Ok(Self::NGram),
174 v if v == Self::Inverted as i32 => Ok(Self::Inverted),
175 v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
176 v if v == Self::MemWal as i32 => Ok(Self::MemWal),
177 v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
178 v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
179 v if v == Self::RTree as i32 => Ok(Self::RTree),
180 v if v == Self::Vector as i32 => Ok(Self::Vector),
181 v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
182 v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
183 v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
184 v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
185 v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
186 v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
187 v if v == Self::IvfRq as i32 => Ok(Self::IvfRq),
188 _ => Err(Error::invalid_input_source(
189 format!("the input value {} is not a valid IndexType", value).into(),
190 )),
191 }
192 }
193}
194
195impl TryFrom<&str> for IndexType {
196 type Error = Error;
197
198 fn try_from(value: &str) -> Result<Self> {
199 match value {
200 "BTree" | "BTREE" => Ok(Self::BTree),
201 "Bitmap" | "BITMAP" => Ok(Self::Bitmap),
202 "LabelList" | "LABELLIST" => Ok(Self::LabelList),
203 "Inverted" | "INVERTED" => Ok(Self::Inverted),
204 "NGram" | "NGRAM" => Ok(Self::NGram),
205 "ZoneMap" | "ZONEMAP" => Ok(Self::ZoneMap),
206 "BloomFilter" | "BLOOMFILTER" | "BLOOM_FILTER" => Ok(Self::BloomFilter),
207 "RTree" | "RTREE" | "R_TREE" => Ok(Self::RTree),
208 "Vector" | "VECTOR" => Ok(Self::Vector),
209 "IVF_FLAT" => Ok(Self::IvfFlat),
210 "IVF_SQ" => Ok(Self::IvfSq),
211 "IVF_PQ" => Ok(Self::IvfPq),
212 "IVF_RQ" => Ok(Self::IvfRq),
213 "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
214 "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
215 "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
216 "FragmentReuse" => Ok(Self::FragmentReuse),
217 "MemWal" => Ok(Self::MemWal),
218 _ => Err(Error::invalid_input(format!(
219 "invalid index type: {}",
220 value
221 ))),
222 }
223 }
224}
225
226impl IndexType {
227 pub fn is_scalar(&self) -> bool {
228 matches!(
229 self,
230 Self::Scalar
231 | Self::BTree
232 | Self::Bitmap
233 | Self::LabelList
234 | Self::Inverted
235 | Self::NGram
236 | Self::ZoneMap
237 | Self::BloomFilter
238 | Self::RTree,
239 )
240 }
241
242 pub fn is_vector(&self) -> bool {
243 matches!(
244 self,
245 Self::Vector
246 | Self::IvfPq
247 | Self::IvfHnswSq
248 | Self::IvfHnswPq
249 | Self::IvfHnswFlat
250 | Self::IvfFlat
251 | Self::IvfSq
252 | Self::IvfRq
253 )
254 }
255
256 pub fn is_system(&self) -> bool {
257 matches!(self, Self::FragmentReuse | Self::MemWal)
258 }
259
260 pub fn version(&self) -> i32 {
266 match self {
267 Self::Scalar => 0,
268 Self::BTree => 0,
269 Self::Bitmap => 0,
270 Self::LabelList => 0,
271 Self::Inverted => 0,
272 Self::NGram => 0,
273 Self::FragmentReuse => 0,
274 Self::MemWal => 0,
275 Self::ZoneMap => 0,
276 Self::BloomFilter => 0,
277 Self::RTree => 0,
278
279 Self::Vector
286 | Self::IvfFlat
287 | Self::IvfSq
288 | Self::IvfPq
289 | Self::IvfHnswSq
290 | Self::IvfHnswPq
291 | Self::IvfHnswFlat => VECTOR_INDEX_VERSION as i32,
292 Self::IvfRq => IVF_RQ_INDEX_VERSION as i32,
293 }
294 }
295
296 pub fn target_partition_size(&self) -> usize {
303 match self {
304 Self::Vector => 8192,
305 Self::IvfFlat => 4096,
306 Self::IvfSq => 8192,
307 Self::IvfPq => 8192,
308 Self::IvfHnswFlat => 1 << 20,
309 Self::IvfHnswSq => 1 << 20,
310 Self::IvfHnswPq => 1 << 20,
311 _ => 8192,
312 }
313 }
314
315 pub fn max_vector_version() -> u32 {
317 [
318 Self::Vector,
319 Self::IvfFlat,
320 Self::IvfSq,
321 Self::IvfPq,
322 Self::IvfHnswSq,
323 Self::IvfHnswPq,
324 Self::IvfHnswFlat,
325 Self::IvfRq,
326 ]
327 .into_iter()
328 .map(|index_type| index_type.version() as u32)
329 .max()
330 .unwrap_or(VECTOR_INDEX_VERSION)
331 }
332}
333
334pub trait IndexParams: Send + Sync {
335 fn as_any(&self) -> &dyn Any;
336
337 fn index_name(&self) -> &str;
338}
339
340#[derive(Serialize, Deserialize, Debug)]
341pub struct IndexMetadata {
342 #[serde(rename = "type")]
343 pub index_type: String,
344 pub distance_type: String,
345}
346
347pub fn is_system_index(index_meta: &lance_table::format::IndexMetadata) -> bool {
348 index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
349}
350
351pub fn infer_system_index_type(
352 index_meta: &lance_table::format::IndexMetadata,
353) -> Option<IndexType> {
354 if index_meta.name == FRAG_REUSE_INDEX_NAME {
355 Some(IndexType::FragmentReuse)
356 } else if index_meta.name == MEM_WAL_INDEX_NAME {
357 Some(IndexType::MemWal)
358 } else {
359 None
360 }
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366
367 #[test]
368 fn test_ivf_rq_has_dedicated_index_version() {
369 assert!(IndexType::IvfRq.version() > IndexType::IvfPq.version());
370 assert_eq!(IndexType::IvfRq.version() as u32, IVF_RQ_INDEX_VERSION);
371 }
372
373 #[test]
374 fn test_max_vector_version_tracks_highest_supported() {
375 assert_eq!(IndexType::max_vector_version(), IVF_RQ_INDEX_VERSION);
376 }
377
378 #[test]
379 fn test_index_type_try_from_i32_covers_all_variants() {
380 let all = [
381 IndexType::Scalar,
382 IndexType::BTree,
383 IndexType::Bitmap,
384 IndexType::LabelList,
385 IndexType::Inverted,
386 IndexType::NGram,
387 IndexType::FragmentReuse,
388 IndexType::MemWal,
389 IndexType::ZoneMap,
390 IndexType::BloomFilter,
391 IndexType::RTree,
392 IndexType::Vector,
393 IndexType::IvfFlat,
394 IndexType::IvfSq,
395 IndexType::IvfPq,
396 IndexType::IvfHnswSq,
397 IndexType::IvfHnswPq,
398 IndexType::IvfHnswFlat,
399 IndexType::IvfRq,
400 ];
401
402 for index_type in all {
403 assert_eq!(
404 IndexType::try_from(index_type as i32).unwrap(),
405 index_type,
406 "IndexType::try_from(i32) should support {:?}",
407 index_type
408 );
409 }
410 }
411
412 #[test]
413 fn test_index_type_try_from_str_covers_all_parseable_variants() {
414 let cases = [
415 ("BTree", IndexType::BTree),
416 ("BTREE", IndexType::BTree),
417 ("Bitmap", IndexType::Bitmap),
418 ("BITMAP", IndexType::Bitmap),
419 ("LabelList", IndexType::LabelList),
420 ("LABELLIST", IndexType::LabelList),
421 ("Inverted", IndexType::Inverted),
422 ("INVERTED", IndexType::Inverted),
423 ("NGram", IndexType::NGram),
424 ("NGRAM", IndexType::NGram),
425 ("ZoneMap", IndexType::ZoneMap),
426 ("ZONEMAP", IndexType::ZoneMap),
427 ("BloomFilter", IndexType::BloomFilter),
428 ("BLOOMFILTER", IndexType::BloomFilter),
429 ("BLOOM_FILTER", IndexType::BloomFilter),
430 ("RTree", IndexType::RTree),
431 ("RTREE", IndexType::RTree),
432 ("R_TREE", IndexType::RTree),
433 ("Vector", IndexType::Vector),
434 ("VECTOR", IndexType::Vector),
435 ("IVF_FLAT", IndexType::IvfFlat),
436 ("IVF_SQ", IndexType::IvfSq),
437 ("IVF_PQ", IndexType::IvfPq),
438 ("IVF_RQ", IndexType::IvfRq),
439 ("IVF_HNSW_FLAT", IndexType::IvfHnswFlat),
440 ("IVF_HNSW_SQ", IndexType::IvfHnswSq),
441 ("IVF_HNSW_PQ", IndexType::IvfHnswPq),
442 ("FragmentReuse", IndexType::FragmentReuse),
443 ("MemWal", IndexType::MemWal),
444 ];
445
446 for (text, expected) in cases {
447 assert_eq!(
448 IndexType::try_from(text).unwrap(),
449 expected,
450 "IndexType::try_from(&str) should support '{text}'"
451 );
452 }
453 }
454}