lance_index/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Lance secondary index library
5//!
6//! <section class="warning">
7//! This is internal crate used by <a href="https://github.com/lancedb/lance">the lance project</a>.
8//! <br/>
9//! API stability is not guaranteed.
10//! </section>
11
12use std::{any::Any, sync::Arc};
13
14use async_trait::async_trait;
15use deepsize::DeepSizeOf;
16use lance_core::{Error, Result};
17use roaring::RoaringBitmap;
18use serde::{Deserialize, Serialize};
19use snafu::location;
20use std::convert::TryFrom;
21
22pub mod metrics;
23pub mod optimize;
24pub mod prefilter;
25pub mod scalar;
26pub mod traits;
27pub mod vector;
28pub use crate::traits::*;
29
30pub const INDEX_FILE_NAME: &str = "index.idx";
31/// The name of the auxiliary index file.
32///
33/// This file is used to store additional information about the index, to improve performance.
34/// - For 'IVF_HNSW' index, it stores the partitioned PQ Storage.
35pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
36pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
37
38pub mod pb {
39    #![allow(clippy::use_self)]
40    include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
41}
42
43/// Generic methods common across all types of secondary indices
44///
45#[async_trait]
46pub trait Index: Send + Sync + DeepSizeOf {
47    /// Cast to [Any].
48    fn as_any(&self) -> &dyn Any;
49
50    /// Cast to [Index]
51    fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
52
53    /// Cast to [vector::VectorIndex]
54    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
55
56    /// Retrieve index statistics as a JSON Value
57    fn statistics(&self) -> Result<serde_json::Value>;
58
59    /// Prewarm the index.
60    ///
61    /// This will load the index into memory and cache it.
62    async fn prewarm(&self) -> Result<()>;
63
64    /// Get the type of the index
65    fn index_type(&self) -> IndexType;
66
67    /// Read through the index and determine which fragment ids are covered by the index
68    ///
69    /// This is a kind of slow operation.  It's better to use the fragment_bitmap.  This
70    /// only exists for cases where the fragment_bitmap has become corrupted or missing.
71    async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
72}
73
74/// Index Type
75#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
76pub enum IndexType {
77    // Preserve 0-100 for simple indices.
78    Scalar = 0, // Legacy scalar index, alias to BTree
79
80    BTree = 1, // BTree
81
82    Bitmap = 2, // Bitmap
83
84    LabelList = 3, // LabelList
85
86    Inverted = 4, // Inverted
87
88    NGram = 5, // NGram
89
90    // 100+ and up for vector index.
91    /// Flat vector index.
92    Vector = 100, // Legacy vector index, alias to IvfPq
93    IvfFlat = 101,
94    IvfSq = 102,
95    IvfPq = 103,
96    IvfHnswSq = 104,
97    IvfHnswPq = 105,
98}
99
100impl std::fmt::Display for IndexType {
101    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
102        match self {
103            Self::Scalar | Self::BTree => write!(f, "BTree"),
104            Self::Bitmap => write!(f, "Bitmap"),
105            Self::LabelList => write!(f, "LabelList"),
106            Self::Inverted => write!(f, "Inverted"),
107            Self::NGram => write!(f, "NGram"),
108            Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
109            Self::IvfFlat => write!(f, "IVF_FLAT"),
110            Self::IvfSq => write!(f, "IVF_SQ"),
111            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
112            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
113        }
114    }
115}
116
117impl TryFrom<i32> for IndexType {
118    type Error = Error;
119
120    fn try_from(value: i32) -> Result<Self> {
121        match value {
122            v if v == Self::Scalar as i32 => Ok(Self::Scalar),
123            v if v == Self::BTree as i32 => Ok(Self::BTree),
124            v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
125            v if v == Self::LabelList as i32 => Ok(Self::LabelList),
126            v if v == Self::NGram as i32 => Ok(Self::NGram),
127            v if v == Self::Inverted as i32 => Ok(Self::Inverted),
128            v if v == Self::Vector as i32 => Ok(Self::Vector),
129            v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
130            v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
131            v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
132            v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
133            v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
134            _ => Err(Error::InvalidInput {
135                source: format!("the input value {} is not a valid IndexType", value).into(),
136                location: location!(),
137            }),
138        }
139    }
140}
141
142impl IndexType {
143    pub fn is_scalar(&self) -> bool {
144        matches!(
145            self,
146            Self::Scalar
147                | Self::BTree
148                | Self::Bitmap
149                | Self::LabelList
150                | Self::Inverted
151                | Self::NGram
152        )
153    }
154
155    pub fn is_vector(&self) -> bool {
156        matches!(
157            self,
158            Self::Vector
159                | Self::IvfPq
160                | Self::IvfHnswSq
161                | Self::IvfHnswPq
162                | Self::IvfFlat
163                | Self::IvfSq
164        )
165    }
166}
167
168pub trait IndexParams: Send + Sync {
169    fn as_any(&self) -> &dyn Any;
170
171    fn index_type(&self) -> IndexType;
172
173    fn index_name(&self) -> &str;
174}
175
176#[derive(Serialize, Deserialize, Debug)]
177pub struct IndexMetadata {
178    #[serde(rename = "type")]
179    pub index_type: String,
180    pub distance_type: String,
181}