1use std::{any::Any, sync::Arc};
13
14use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
15use crate::mem_wal::MEM_WAL_INDEX_NAME;
16use async_trait::async_trait;
17use deepsize::DeepSizeOf;
18use lance_core::{Error, Result};
19use roaring::RoaringBitmap;
20use serde::{Deserialize, Serialize};
21use std::convert::TryFrom;
22
23pub mod frag_reuse;
24pub mod mem_wal;
25pub mod metrics;
26pub mod optimize;
27pub mod prefilter;
28pub mod progress;
29pub mod registry;
30pub mod scalar;
31pub mod traits;
32pub mod types;
33pub mod vector;
34
35pub use crate::traits::*;
36pub use crate::types::{IndexSegment, IndexSegmentPlan};
37
38pub const INDEX_FILE_NAME: &str = "index.idx";
39pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
44pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
45
46pub const VECTOR_INDEX_VERSION: u32 = 1;
51pub const IVF_RQ_INDEX_VERSION: u32 = 2;
53
54pub const MAX_PARTITION_SIZE_FACTOR: usize = 4;
61pub const MIN_PARTITION_SIZE_PERCENT: usize = 25;
62
63pub mod pb {
64 #![allow(clippy::use_self)]
65 include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
66}
67
68pub mod pbold {
69 #![allow(clippy::use_self)]
70 include!(concat!(env!("OUT_DIR"), "/lance.table.rs"));
71}
72
73#[async_trait]
76pub trait Index: Send + Sync + DeepSizeOf {
77 fn as_any(&self) -> &dyn Any;
79
80 fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
82
83 fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
85
86 fn statistics(&self) -> Result<serde_json::Value>;
88
89 async fn prewarm(&self) -> Result<()>;
93
94 fn index_type(&self) -> IndexType;
96
97 async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
102}
103
104#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
106pub enum IndexType {
107 Scalar = 0, BTree = 1, Bitmap = 2, LabelList = 3, Inverted = 4, NGram = 5, FragmentReuse = 6,
121
122 MemWal = 7,
123
124 ZoneMap = 8, BloomFilter = 9, RTree = 10, Vector = 100, IvfFlat = 101,
134 IvfSq = 102,
135 IvfPq = 103,
136 IvfHnswSq = 104,
137 IvfHnswPq = 105,
138 IvfHnswFlat = 106,
139 IvfRq = 107,
140}
141
142impl std::fmt::Display for IndexType {
143 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
144 match self {
145 Self::Scalar | Self::BTree => write!(f, "BTree"),
146 Self::Bitmap => write!(f, "Bitmap"),
147 Self::LabelList => write!(f, "LabelList"),
148 Self::Inverted => write!(f, "Inverted"),
149 Self::NGram => write!(f, "NGram"),
150 Self::FragmentReuse => write!(f, "FragmentReuse"),
151 Self::MemWal => write!(f, "MemWal"),
152 Self::ZoneMap => write!(f, "ZoneMap"),
153 Self::BloomFilter => write!(f, "BloomFilter"),
154 Self::RTree => write!(f, "RTree"),
155 Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
156 Self::IvfFlat => write!(f, "IVF_FLAT"),
157 Self::IvfSq => write!(f, "IVF_SQ"),
158 Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
159 Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
160 Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
161 Self::IvfRq => write!(f, "IVF_RQ"),
162 }
163 }
164}
165
166impl TryFrom<i32> for IndexType {
167 type Error = Error;
168
169 fn try_from(value: i32) -> Result<Self> {
170 match value {
171 v if v == Self::Scalar as i32 => Ok(Self::Scalar),
172 v if v == Self::BTree as i32 => Ok(Self::BTree),
173 v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
174 v if v == Self::LabelList as i32 => Ok(Self::LabelList),
175 v if v == Self::NGram as i32 => Ok(Self::NGram),
176 v if v == Self::Inverted as i32 => Ok(Self::Inverted),
177 v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
178 v if v == Self::MemWal as i32 => Ok(Self::MemWal),
179 v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
180 v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
181 v if v == Self::Vector as i32 => Ok(Self::Vector),
182 v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
183 v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
184 v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
185 v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
186 v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
187 v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
188 v if v == Self::IvfRq as i32 => Ok(Self::IvfRq),
189 _ => Err(Error::invalid_input_source(
190 format!("the input value {} is not a valid IndexType", value).into(),
191 )),
192 }
193 }
194}
195
196impl TryFrom<&str> for IndexType {
197 type Error = Error;
198
199 fn try_from(value: &str) -> Result<Self> {
200 match value {
201 "BTree" | "BTREE" => Ok(Self::BTree),
202 "Bitmap" | "BITMAP" => Ok(Self::Bitmap),
203 "LabelList" | "LABELLIST" => Ok(Self::LabelList),
204 "Inverted" | "INVERTED" => Ok(Self::Inverted),
205 "NGram" | "NGRAM" => Ok(Self::NGram),
206 "ZoneMap" | "ZONEMAP" => Ok(Self::ZoneMap),
207 "Vector" | "VECTOR" => Ok(Self::Vector),
208 "IVF_FLAT" => Ok(Self::IvfFlat),
209 "IVF_SQ" => Ok(Self::IvfSq),
210 "IVF_PQ" => Ok(Self::IvfPq),
211 "IVF_RQ" => Ok(Self::IvfRq),
212 "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
213 "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
214 "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
215 "FragmentReuse" => Ok(Self::FragmentReuse),
216 "MemWal" => Ok(Self::MemWal),
217 _ => Err(Error::invalid_input(format!(
218 "invalid index type: {}",
219 value
220 ))),
221 }
222 }
223}
224
225impl IndexType {
226 pub fn is_scalar(&self) -> bool {
227 matches!(
228 self,
229 Self::Scalar
230 | Self::BTree
231 | Self::Bitmap
232 | Self::LabelList
233 | Self::Inverted
234 | Self::NGram
235 | Self::ZoneMap
236 | Self::BloomFilter
237 | Self::RTree,
238 )
239 }
240
241 pub fn is_vector(&self) -> bool {
242 matches!(
243 self,
244 Self::Vector
245 | Self::IvfPq
246 | Self::IvfHnswSq
247 | Self::IvfHnswPq
248 | Self::IvfHnswFlat
249 | Self::IvfFlat
250 | Self::IvfSq
251 | Self::IvfRq
252 )
253 }
254
255 pub fn is_system(&self) -> bool {
256 matches!(self, Self::FragmentReuse | Self::MemWal)
257 }
258
259 pub fn version(&self) -> i32 {
265 match self {
266 Self::Scalar => 0,
267 Self::BTree => 0,
268 Self::Bitmap => 0,
269 Self::LabelList => 0,
270 Self::Inverted => 0,
271 Self::NGram => 0,
272 Self::FragmentReuse => 0,
273 Self::MemWal => 0,
274 Self::ZoneMap => 0,
275 Self::BloomFilter => 0,
276 Self::RTree => 0,
277
278 Self::Vector
285 | Self::IvfFlat
286 | Self::IvfSq
287 | Self::IvfPq
288 | Self::IvfHnswSq
289 | Self::IvfHnswPq
290 | Self::IvfHnswFlat => VECTOR_INDEX_VERSION as i32,
291 Self::IvfRq => IVF_RQ_INDEX_VERSION as i32,
292 }
293 }
294
295 pub fn target_partition_size(&self) -> usize {
302 match self {
303 Self::Vector => 8192,
304 Self::IvfFlat => 4096,
305 Self::IvfSq => 8192,
306 Self::IvfPq => 8192,
307 Self::IvfHnswFlat => 1 << 20,
308 Self::IvfHnswSq => 1 << 20,
309 Self::IvfHnswPq => 1 << 20,
310 _ => 8192,
311 }
312 }
313
314 pub fn max_vector_version() -> u32 {
316 [
317 Self::Vector,
318 Self::IvfFlat,
319 Self::IvfSq,
320 Self::IvfPq,
321 Self::IvfHnswSq,
322 Self::IvfHnswPq,
323 Self::IvfHnswFlat,
324 Self::IvfRq,
325 ]
326 .into_iter()
327 .map(|index_type| index_type.version() as u32)
328 .max()
329 .unwrap_or(VECTOR_INDEX_VERSION)
330 }
331}
332
333pub trait IndexParams: Send + Sync {
334 fn as_any(&self) -> &dyn Any;
335
336 fn index_name(&self) -> &str;
337}
338
339#[derive(Serialize, Deserialize, Debug)]
340pub struct IndexMetadata {
341 #[serde(rename = "type")]
342 pub index_type: String,
343 pub distance_type: String,
344}
345
346pub fn is_system_index(index_meta: &lance_table::format::IndexMetadata) -> bool {
347 index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
348}
349
350pub fn infer_system_index_type(
351 index_meta: &lance_table::format::IndexMetadata,
352) -> Option<IndexType> {
353 if index_meta.name == FRAG_REUSE_INDEX_NAME {
354 Some(IndexType::FragmentReuse)
355 } else if index_meta.name == MEM_WAL_INDEX_NAME {
356 Some(IndexType::MemWal)
357 } else {
358 None
359 }
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365
366 #[test]
367 fn test_ivf_rq_has_dedicated_index_version() {
368 assert!(IndexType::IvfRq.version() > IndexType::IvfPq.version());
369 assert_eq!(IndexType::IvfRq.version() as u32, IVF_RQ_INDEX_VERSION);
370 }
371
372 #[test]
373 fn test_max_vector_version_tracks_highest_supported() {
374 assert_eq!(IndexType::max_vector_version(), IVF_RQ_INDEX_VERSION);
375 }
376}