lance_index/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Lance secondary index library
5//!
6//! <section class="warning">
7//! This is internal crate used by <a href="https://github.com/lancedb/lance">the lance project</a>.
8//! <br/>
9//! API stability is not guaranteed.
10//! </section>
11
12use std::{any::Any, sync::Arc};
13
14use async_trait::async_trait;
15use deepsize::DeepSizeOf;
16use lance_core::{Error, Result};
17use roaring::RoaringBitmap;
18use serde::{Deserialize, Serialize};
19use snafu::location;
20use std::convert::TryFrom;
21
22pub mod frag_reuse;
23pub mod metrics;
24pub mod optimize;
25pub mod prefilter;
26pub mod scalar;
27pub mod traits;
28pub mod vector;
29
30pub use crate::traits::*;
31
32pub const INDEX_FILE_NAME: &str = "index.idx";
33/// The name of the auxiliary index file.
34///
35/// This file is used to store additional information about the index, to improve performance.
36/// - For 'IVF_HNSW' index, it stores the partitioned PQ Storage.
37pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
38pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
39
40pub mod pb {
41    #![allow(clippy::use_self)]
42    include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
43}
44
45/// Generic methods common across all types of secondary indices
46///
47#[async_trait]
48pub trait Index: Send + Sync + DeepSizeOf {
49    /// Cast to [Any].
50    fn as_any(&self) -> &dyn Any;
51
52    /// Cast to [Index]
53    fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
54
55    /// Cast to [vector::VectorIndex]
56    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
57
58    /// Retrieve index statistics as a JSON Value
59    fn statistics(&self) -> Result<serde_json::Value>;
60
61    /// Prewarm the index.
62    ///
63    /// This will load the index into memory and cache it.
64    async fn prewarm(&self) -> Result<()>;
65
66    /// Get the type of the index
67    fn index_type(&self) -> IndexType;
68
69    /// Read through the index and determine which fragment ids are covered by the index
70    ///
71    /// This is a kind of slow operation.  It's better to use the fragment_bitmap.  This
72    /// only exists for cases where the fragment_bitmap has become corrupted or missing.
73    async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
74}
75
76/// Index Type
77#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
78pub enum IndexType {
79    // Preserve 0-100 for simple indices.
80    Scalar = 0, // Legacy scalar index, alias to BTree
81
82    BTree = 1, // BTree
83
84    Bitmap = 2, // Bitmap
85
86    LabelList = 3, // LabelList
87
88    Inverted = 4, // Inverted
89
90    NGram = 5, // NGram
91
92    FragmentReuse = 6,
93
94    // 100+ and up for vector index.
95    /// Flat vector index.
96    Vector = 100, // Legacy vector index, alias to IvfPq
97    IvfFlat = 101,
98    IvfSq = 102,
99    IvfPq = 103,
100    IvfHnswSq = 104,
101    IvfHnswPq = 105,
102    IvfHnswFlat = 106,
103}
104
105impl std::fmt::Display for IndexType {
106    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
107        match self {
108            Self::Scalar | Self::BTree => write!(f, "BTree"),
109            Self::Bitmap => write!(f, "Bitmap"),
110            Self::LabelList => write!(f, "LabelList"),
111            Self::Inverted => write!(f, "Inverted"),
112            Self::NGram => write!(f, "NGram"),
113            Self::FragmentReuse => write!(f, "FragmentReuse"),
114            Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
115            Self::IvfFlat => write!(f, "IVF_FLAT"),
116            Self::IvfSq => write!(f, "IVF_SQ"),
117            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
118            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
119            Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
120        }
121    }
122}
123
124impl TryFrom<i32> for IndexType {
125    type Error = Error;
126
127    fn try_from(value: i32) -> Result<Self> {
128        match value {
129            v if v == Self::Scalar as i32 => Ok(Self::Scalar),
130            v if v == Self::BTree as i32 => Ok(Self::BTree),
131            v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
132            v if v == Self::LabelList as i32 => Ok(Self::LabelList),
133            v if v == Self::NGram as i32 => Ok(Self::NGram),
134            v if v == Self::Inverted as i32 => Ok(Self::Inverted),
135            v if v == Self::Vector as i32 => Ok(Self::Vector),
136            v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
137            v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
138            v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
139            v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
140            v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
141            v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
142            _ => Err(Error::InvalidInput {
143                source: format!("the input value {} is not a valid IndexType", value).into(),
144                location: location!(),
145            }),
146        }
147    }
148}
149
150impl IndexType {
151    pub fn is_scalar(&self) -> bool {
152        matches!(
153            self,
154            Self::Scalar
155                | Self::BTree
156                | Self::Bitmap
157                | Self::LabelList
158                | Self::Inverted
159                | Self::NGram
160        )
161    }
162
163    pub fn is_vector(&self) -> bool {
164        matches!(
165            self,
166            Self::Vector
167                | Self::IvfPq
168                | Self::IvfHnswSq
169                | Self::IvfHnswPq
170                | Self::IvfHnswFlat
171                | Self::IvfFlat
172                | Self::IvfSq
173        )
174    }
175
176    /// Returns the current format version of the index type,
177    /// bump this when the index format changes.
178    /// Indices which higher version than these will be ignored for compatibility,
179    /// This would happen when creating index in a newer version of Lance,
180    /// but then opening the index in older version of Lance
181    pub fn version(&self) -> i32 {
182        match self {
183            Self::Scalar => 0,
184            Self::BTree => 0,
185            Self::Bitmap => 0,
186            Self::LabelList => 0,
187            Self::Inverted => 0,
188            Self::NGram => 0,
189            Self::FragmentReuse => 0,
190
191            // for now all vector indices are built by the same builder,
192            // so they share the same version.
193            Self::Vector
194            | Self::IvfFlat
195            | Self::IvfSq
196            | Self::IvfPq
197            | Self::IvfHnswSq
198            | Self::IvfHnswPq
199            | Self::IvfHnswFlat => 1,
200        }
201    }
202}
203
204pub trait IndexParams: Send + Sync {
205    fn as_any(&self) -> &dyn Any;
206
207    fn index_type(&self) -> IndexType;
208
209    fn index_name(&self) -> &str;
210}
211
212#[derive(Serialize, Deserialize, Debug)]
213pub struct IndexMetadata {
214    #[serde(rename = "type")]
215    pub index_type: String,
216    pub distance_type: String,
217}