1use std::{any::Any, sync::Arc};
13
14use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
15use crate::mem_wal::MEM_WAL_INDEX_NAME;
16use async_trait::async_trait;
17use deepsize::DeepSizeOf;
18use lance_core::{Error, Result};
19use roaring::RoaringBitmap;
20use serde::{Deserialize, Serialize};
21use snafu::location;
22use std::convert::TryFrom;
23
24pub mod frag_reuse;
25pub mod mem_wal;
26pub mod metrics;
27pub mod optimize;
28pub mod prefilter;
29pub mod scalar;
30pub mod traits;
31pub mod vector;
32
33pub use crate::traits::*;
34
35pub const INDEX_FILE_NAME: &str = "index.idx";
36pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
41pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
42
43pub const VECTOR_INDEX_VERSION: u32 = 1;
45
46pub mod pb {
47 #![allow(clippy::use_self)]
48 include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
49}
50
51pub mod pbold {
52 #![allow(clippy::use_self)]
53 include!(concat!(env!("OUT_DIR"), "/lance.table.rs"));
54}
55
56#[async_trait]
59pub trait Index: Send + Sync + DeepSizeOf {
60 fn as_any(&self) -> &dyn Any;
62
63 fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
65
66 fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
68
69 fn statistics(&self) -> Result<serde_json::Value>;
71
72 async fn prewarm(&self) -> Result<()>;
76
77 fn index_type(&self) -> IndexType;
79
80 async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
85}
86
87#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
89pub enum IndexType {
90 Scalar = 0, BTree = 1, Bitmap = 2, LabelList = 3, Inverted = 4, NGram = 5, FragmentReuse = 6,
104
105 MemWal = 7,
106
107 ZoneMap = 8, BloomFilter = 9, Vector = 100, IvfFlat = 101,
115 IvfSq = 102,
116 IvfPq = 103,
117 IvfHnswSq = 104,
118 IvfHnswPq = 105,
119 IvfHnswFlat = 106,
120 IvfRq = 107,
121}
122
123impl std::fmt::Display for IndexType {
124 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
125 match self {
126 Self::Scalar | Self::BTree => write!(f, "BTree"),
127 Self::Bitmap => write!(f, "Bitmap"),
128 Self::LabelList => write!(f, "LabelList"),
129 Self::Inverted => write!(f, "Inverted"),
130 Self::NGram => write!(f, "NGram"),
131 Self::FragmentReuse => write!(f, "FragmentReuse"),
132 Self::MemWal => write!(f, "MemWal"),
133 Self::ZoneMap => write!(f, "ZoneMap"),
134 Self::BloomFilter => write!(f, "BloomFilter"),
135 Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
136 Self::IvfFlat => write!(f, "IVF_FLAT"),
137 Self::IvfSq => write!(f, "IVF_SQ"),
138 Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
139 Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
140 Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
141 Self::IvfRq => write!(f, "IVF_RQ"),
142 }
143 }
144}
145
146impl TryFrom<i32> for IndexType {
147 type Error = Error;
148
149 fn try_from(value: i32) -> Result<Self> {
150 match value {
151 v if v == Self::Scalar as i32 => Ok(Self::Scalar),
152 v if v == Self::BTree as i32 => Ok(Self::BTree),
153 v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
154 v if v == Self::LabelList as i32 => Ok(Self::LabelList),
155 v if v == Self::NGram as i32 => Ok(Self::NGram),
156 v if v == Self::Inverted as i32 => Ok(Self::Inverted),
157 v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
158 v if v == Self::MemWal as i32 => Ok(Self::MemWal),
159 v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
160 v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
161 v if v == Self::Vector as i32 => Ok(Self::Vector),
162 v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
163 v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
164 v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
165 v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
166 v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
167 v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
168 _ => Err(Error::InvalidInput {
169 source: format!("the input value {} is not a valid IndexType", value).into(),
170 location: location!(),
171 }),
172 }
173 }
174}
175
176impl TryFrom<&str> for IndexType {
177 type Error = Error;
178
179 fn try_from(value: &str) -> Result<Self> {
180 match value {
181 "BTree" => Ok(Self::BTree),
182 "Bitmap" => Ok(Self::Bitmap),
183 "LabelList" => Ok(Self::LabelList),
184 "Inverted" => Ok(Self::Inverted),
185 "NGram" => Ok(Self::NGram),
186 "FragmentReuse" => Ok(Self::FragmentReuse),
187 "MemWal" => Ok(Self::MemWal),
188 "ZoneMap" => Ok(Self::ZoneMap),
189 "Vector" => Ok(Self::Vector),
190 "IVF_FLAT" => Ok(Self::IvfFlat),
191 "IVF_SQ" => Ok(Self::IvfSq),
192 "IVF_PQ" => Ok(Self::IvfPq),
193 "IVF_RQ" => Ok(Self::IvfRq),
194 "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
195 "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
196 "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
197 _ => Err(Error::invalid_input(
198 format!("invalid index type: {}", value),
199 location!(),
200 )),
201 }
202 }
203}
204
205impl IndexType {
206 pub fn is_scalar(&self) -> bool {
207 matches!(
208 self,
209 Self::Scalar
210 | Self::BTree
211 | Self::Bitmap
212 | Self::LabelList
213 | Self::Inverted
214 | Self::NGram
215 | Self::ZoneMap
216 | Self::BloomFilter
217 )
218 }
219
220 pub fn is_vector(&self) -> bool {
221 matches!(
222 self,
223 Self::Vector
224 | Self::IvfPq
225 | Self::IvfHnswSq
226 | Self::IvfHnswPq
227 | Self::IvfHnswFlat
228 | Self::IvfFlat
229 | Self::IvfSq
230 | Self::IvfRq
231 )
232 }
233
234 pub fn is_system(&self) -> bool {
235 matches!(self, Self::FragmentReuse | Self::MemWal)
236 }
237
238 pub fn version(&self) -> i32 {
244 match self {
245 Self::Scalar => 0,
246 Self::BTree => 0,
247 Self::Bitmap => 0,
248 Self::LabelList => 0,
249 Self::Inverted => 0,
250 Self::NGram => 0,
251 Self::FragmentReuse => 0,
252 Self::MemWal => 0,
253 Self::ZoneMap => 0,
254 Self::BloomFilter => 0,
255
256 Self::Vector
259 | Self::IvfFlat
260 | Self::IvfSq
261 | Self::IvfPq
262 | Self::IvfHnswSq
263 | Self::IvfHnswPq
264 | Self::IvfHnswFlat
265 | Self::IvfRq => 1,
266 }
267 }
268
269 pub fn target_partition_size(&self) -> usize {
276 match self {
277 Self::Vector => 8192,
278 Self::IvfFlat => 4096,
279 Self::IvfSq => 8192,
280 Self::IvfPq => 8192,
281 Self::IvfHnswFlat => 1 << 20,
282 Self::IvfHnswSq => 1 << 20,
283 Self::IvfHnswPq => 1 << 20,
284 _ => 8192,
285 }
286 }
287}
288
289pub trait IndexParams: Send + Sync {
290 fn as_any(&self) -> &dyn Any;
291
292 fn index_name(&self) -> &str;
293}
294
295#[derive(Serialize, Deserialize, Debug)]
296pub struct IndexMetadata {
297 #[serde(rename = "type")]
298 pub index_type: String,
299 pub distance_type: String,
300}
301
302pub fn is_system_index(index_meta: &lance_table::format::IndexMetadata) -> bool {
303 index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
304}
305
306pub fn infer_system_index_type(
307 index_meta: &lance_table::format::IndexMetadata,
308) -> Option<IndexType> {
309 if index_meta.name == FRAG_REUSE_INDEX_NAME {
310 Some(IndexType::FragmentReuse)
311 } else if index_meta.name == MEM_WAL_INDEX_NAME {
312 Some(IndexType::MemWal)
313 } else {
314 None
315 }
316}