lance_index/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Lance secondary index library
5//!
6//! <section class="warning">
7//! This is internal crate used by <a href="https://github.com/lancedb/lance">the lance project</a>.
8//! <br/>
9//! API stability is not guaranteed.
10//! </section>
11
12use std::{any::Any, sync::Arc};
13
14use async_trait::async_trait;
15use deepsize::DeepSizeOf;
16use lance_core::{Error, Result};
17use roaring::RoaringBitmap;
18use serde::{Deserialize, Serialize};
19use snafu::location;
20use std::convert::TryFrom;
21
22pub mod metrics;
23pub mod optimize;
24pub mod prefilter;
25pub mod scalar;
26pub mod traits;
27pub mod vector;
28pub use crate::traits::*;
29
30pub const INDEX_FILE_NAME: &str = "index.idx";
31/// The name of the auxiliary index file.
32///
33/// This file is used to store additional information about the index, to improve performance.
34/// - For 'IVF_HNSW' index, it stores the partitioned PQ Storage.
35pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
36pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
37
38pub mod pb {
39    #![allow(clippy::use_self)]
40    include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
41}
42
43/// Generic methods common across all types of secondary indices
44///
45#[async_trait]
46pub trait Index: Send + Sync + DeepSizeOf {
47    /// Cast to [Any].
48    fn as_any(&self) -> &dyn Any;
49
50    /// Cast to [Index]
51    fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
52
53    /// Cast to [vector::VectorIndex]
54    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
55
56    /// Retrieve index statistics as a JSON Value
57    fn statistics(&self) -> Result<serde_json::Value>;
58
59    /// Get the type of the index
60    fn index_type(&self) -> IndexType;
61
62    /// Read through the index and determine which fragment ids are covered by the index
63    ///
64    /// This is a kind of slow operation.  It's better to use the fragment_bitmap.  This
65    /// only exists for cases where the fragment_bitmap has become corrupted or missing.
66    async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
67}
68
69/// Index Type
70#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
71pub enum IndexType {
72    // Preserve 0-100 for simple indices.
73    Scalar = 0, // Legacy scalar index, alias to BTree
74
75    BTree = 1, // BTree
76
77    Bitmap = 2, // Bitmap
78
79    LabelList = 3, // LabelList
80
81    Inverted = 4, // Inverted
82
83    NGram = 5, // NGram
84
85    // 100+ and up for vector index.
86    /// Flat vector index.
87    Vector = 100, // Legacy vector index, alias to IvfPq
88    IvfFlat = 101,
89    IvfSq = 102,
90    IvfPq = 103,
91    IvfHnswSq = 104,
92    IvfHnswPq = 105,
93}
94
95impl std::fmt::Display for IndexType {
96    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
97        match self {
98            Self::Scalar | Self::BTree => write!(f, "BTree"),
99            Self::Bitmap => write!(f, "Bitmap"),
100            Self::LabelList => write!(f, "LabelList"),
101            Self::Inverted => write!(f, "Inverted"),
102            Self::NGram => write!(f, "NGram"),
103            Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
104            Self::IvfFlat => write!(f, "IVF_FLAT"),
105            Self::IvfSq => write!(f, "IVF_SQ"),
106            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
107            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
108        }
109    }
110}
111
112impl TryFrom<i32> for IndexType {
113    type Error = Error;
114
115    fn try_from(value: i32) -> Result<Self> {
116        match value {
117            v if v == Self::Scalar as i32 => Ok(Self::Scalar),
118            v if v == Self::BTree as i32 => Ok(Self::BTree),
119            v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
120            v if v == Self::LabelList as i32 => Ok(Self::LabelList),
121            v if v == Self::NGram as i32 => Ok(Self::NGram),
122            v if v == Self::Inverted as i32 => Ok(Self::Inverted),
123            v if v == Self::Vector as i32 => Ok(Self::Vector),
124            v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
125            v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
126            v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
127            v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
128            v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
129            _ => Err(Error::InvalidInput {
130                source: format!("the input value {} is not a valid IndexType", value).into(),
131                location: location!(),
132            }),
133        }
134    }
135}
136
137impl IndexType {
138    pub fn is_scalar(&self) -> bool {
139        matches!(
140            self,
141            Self::Scalar
142                | Self::BTree
143                | Self::Bitmap
144                | Self::LabelList
145                | Self::Inverted
146                | Self::NGram
147        )
148    }
149
150    pub fn is_vector(&self) -> bool {
151        matches!(
152            self,
153            Self::Vector
154                | Self::IvfPq
155                | Self::IvfHnswSq
156                | Self::IvfHnswPq
157                | Self::IvfFlat
158                | Self::IvfSq
159        )
160    }
161}
162
163pub trait IndexParams: Send + Sync {
164    fn as_any(&self) -> &dyn Any;
165
166    fn index_type(&self) -> IndexType;
167
168    fn index_name(&self) -> &str;
169}
170
171#[derive(Serialize, Deserialize, Debug)]
172pub struct IndexMetadata {
173    #[serde(rename = "type")]
174    pub index_type: String,
175    pub distance_type: String,
176}