lance_index/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Lance secondary index library
5//!
6//! <section class="warning">
7//! This is internal crate used by <a href="https://github.com/lancedb/lance">the lance project</a>.
8//! <br/>
9//! API stability is not guaranteed.
10//! </section>
11
12use std::{any::Any, sync::Arc};
13
14use async_trait::async_trait;
15use deepsize::DeepSizeOf;
16use lance_core::{Error, Result};
17use roaring::RoaringBitmap;
18use serde::{Deserialize, Serialize};
19use snafu::location;
20use std::convert::TryFrom;
21
22pub mod optimize;
23pub mod prefilter;
24pub mod scalar;
25pub mod traits;
26pub mod vector;
27pub use crate::traits::*;
28
29pub const INDEX_FILE_NAME: &str = "index.idx";
30/// The name of the auxiliary index file.
31///
32/// This file is used to store additional information about the index, to improve performance.
33/// - For 'IVF_HNSW' index, it stores the partitioned PQ Storage.
34pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
35pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
36
37pub mod pb {
38    #![allow(clippy::use_self)]
39    include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
40}
41
42/// Generic methods common across all types of secondary indices
43///
44#[async_trait]
45pub trait Index: Send + Sync + DeepSizeOf {
46    /// Cast to [Any].
47    fn as_any(&self) -> &dyn Any;
48
49    /// Cast to [Index]
50    fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
51
52    /// Cast to [vector::VectorIndex]
53    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
54
55    /// Retrieve index statistics as a JSON Value
56    fn statistics(&self) -> Result<serde_json::Value>;
57
58    /// Get the type of the index
59    fn index_type(&self) -> IndexType;
60
61    /// Read through the index and determine which fragment ids are covered by the index
62    ///
63    /// This is a kind of slow operation.  It's better to use the fragment_bitmap.  This
64    /// only exists for cases where the fragment_bitmap has become corrupted or missing.
65    async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
66}
67
68/// Index Type
69#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
70pub enum IndexType {
71    // Preserve 0-100 for simple indices.
72    Scalar = 0, // Legacy scalar index, alias to BTree
73
74    BTree = 1, // BTree
75
76    Bitmap = 2, // Bitmap
77
78    LabelList = 3, // LabelList
79
80    Inverted = 4, // Inverted
81
82    NGram = 5, // NGram
83
84    // 100+ and up for vector index.
85    /// Flat vector index.
86    Vector = 100, // Legacy vector index, alias to IvfPq
87    IvfFlat = 101,
88    IvfSq = 102,
89    IvfPq = 103,
90    IvfHnswSq = 104,
91    IvfHnswPq = 105,
92}
93
94impl std::fmt::Display for IndexType {
95    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
96        match self {
97            Self::Scalar | Self::BTree => write!(f, "BTree"),
98            Self::Bitmap => write!(f, "Bitmap"),
99            Self::LabelList => write!(f, "LabelList"),
100            Self::Inverted => write!(f, "Inverted"),
101            Self::NGram => write!(f, "NGram"),
102            Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
103            Self::IvfFlat => write!(f, "IVF_FLAT"),
104            Self::IvfSq => write!(f, "IVF_SQ"),
105            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
106            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
107        }
108    }
109}
110
111impl TryFrom<i32> for IndexType {
112    type Error = Error;
113
114    fn try_from(value: i32) -> Result<Self> {
115        match value {
116            v if v == Self::Scalar as i32 => Ok(Self::Scalar),
117            v if v == Self::BTree as i32 => Ok(Self::BTree),
118            v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
119            v if v == Self::LabelList as i32 => Ok(Self::LabelList),
120            v if v == Self::NGram as i32 => Ok(Self::NGram),
121            v if v == Self::Inverted as i32 => Ok(Self::Inverted),
122            v if v == Self::Vector as i32 => Ok(Self::Vector),
123            v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
124            v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
125            v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
126            v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
127            v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
128            _ => Err(Error::InvalidInput {
129                source: format!("the input value {} is not a valid IndexType", value).into(),
130                location: location!(),
131            }),
132        }
133    }
134}
135
136impl IndexType {
137    pub fn is_scalar(&self) -> bool {
138        matches!(
139            self,
140            Self::Scalar
141                | Self::BTree
142                | Self::Bitmap
143                | Self::LabelList
144                | Self::Inverted
145                | Self::NGram
146        )
147    }
148
149    pub fn is_vector(&self) -> bool {
150        matches!(
151            self,
152            Self::Vector
153                | Self::IvfPq
154                | Self::IvfHnswSq
155                | Self::IvfHnswPq
156                | Self::IvfFlat
157                | Self::IvfSq
158        )
159    }
160}
161
162pub trait IndexParams: Send + Sync {
163    fn as_any(&self) -> &dyn Any;
164
165    fn index_type(&self) -> IndexType;
166
167    fn index_name(&self) -> &str;
168}
169
170#[derive(Serialize, Deserialize, Debug)]
171pub struct IndexMetadata {
172    #[serde(rename = "type")]
173    pub index_type: String,
174    pub distance_type: String,
175}