lance_index/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Lance secondary index library
5//!
6//! <section class="warning">
7//! This is internal crate used by <a href="https://github.com/lancedb/lance">the lance project</a>.
8//! <br/>
9//! API stability is not guaranteed.
10//! </section>
11
12use std::{any::Any, sync::Arc};
13
14use async_trait::async_trait;
15use deepsize::DeepSizeOf;
16use lance_core::{Error, Result};
17use roaring::RoaringBitmap;
18use serde::{Deserialize, Serialize};
19use snafu::location;
20use std::convert::TryFrom;
21
22pub mod optimize;
23pub mod prefilter;
24pub mod scalar;
25pub mod traits;
26pub mod vector;
27pub use crate::traits::*;
28
29pub const INDEX_FILE_NAME: &str = "index.idx";
30/// The name of the auxiliary index file.
31///
32/// This file is used to store additional information about the index, to improve performance.
33/// - For 'IVF_HNSW' index, it stores the partitioned PQ Storage.
34pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
35pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
36
37pub mod pb {
38    #![allow(clippy::use_self)]
39    include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
40}
41
42/// Generic methods common across all types of secondary indices
43///
44#[async_trait]
45pub trait Index: Send + Sync + DeepSizeOf {
46    /// Cast to [Any].
47    fn as_any(&self) -> &dyn Any;
48
49    /// Cast to [Index]
50    fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
51
52    /// Cast to [vector::VectorIndex]
53    fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
54
55    /// Retrieve index statistics as a JSON Value
56    fn statistics(&self) -> Result<serde_json::Value>;
57
58    /// Get the type of the index
59    fn index_type(&self) -> IndexType;
60
61    /// Read through the index and determine which fragment ids are covered by the index
62    ///
63    /// This is a kind of slow operation.  It's better to use the fragment_bitmap.  This
64    /// only exists for cases where the fragment_bitmap has become corrupted or missing.
65    async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
66}
67
68/// Index Type
69#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
70pub enum IndexType {
71    // Preserve 0-100 for simple indices.
72    Scalar = 0, // Legacy scalar index, alias to BTree
73
74    BTree = 1, // BTree
75
76    Bitmap = 2, // Bitmap
77
78    LabelList = 3, // LabelList
79
80    Inverted = 4, // Inverted
81
82    // 100+ and up for vector index.
83    /// Flat vector index.
84    Vector = 100, // Legacy vector index, alias to IvfPq
85    IvfFlat = 101,
86    IvfSq = 102,
87    IvfPq = 103,
88    IvfHnswSq = 104,
89    IvfHnswPq = 105,
90}
91
92impl std::fmt::Display for IndexType {
93    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
94        match self {
95            Self::Scalar | Self::BTree => write!(f, "BTree"),
96            Self::Bitmap => write!(f, "Bitmap"),
97            Self::LabelList => write!(f, "LabelList"),
98            Self::Inverted => write!(f, "Inverted"),
99            Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
100            Self::IvfFlat => write!(f, "IVF_FLAT"),
101            Self::IvfSq => write!(f, "IVF_SQ"),
102            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
103            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
104        }
105    }
106}
107
108impl TryFrom<i32> for IndexType {
109    type Error = Error;
110
111    fn try_from(value: i32) -> Result<Self> {
112        match value {
113            v if v == Self::Scalar as i32 => Ok(Self::Scalar),
114            v if v == Self::BTree as i32 => Ok(Self::BTree),
115            v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
116            v if v == Self::LabelList as i32 => Ok(Self::LabelList),
117            v if v == Self::Inverted as i32 => Ok(Self::Inverted),
118            v if v == Self::Vector as i32 => Ok(Self::Vector),
119            v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
120            v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
121            v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
122            v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
123            v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
124            _ => Err(Error::InvalidInput {
125                source: format!("the input value {} is not a valid IndexType", value).into(),
126                location: location!(),
127            }),
128        }
129    }
130}
131
132impl IndexType {
133    pub fn is_scalar(&self) -> bool {
134        matches!(
135            self,
136            Self::Scalar | Self::BTree | Self::Bitmap | Self::LabelList | Self::Inverted
137        )
138    }
139
140    pub fn is_vector(&self) -> bool {
141        matches!(
142            self,
143            Self::Vector
144                | Self::IvfPq
145                | Self::IvfHnswSq
146                | Self::IvfHnswPq
147                | Self::IvfFlat
148                | Self::IvfSq
149        )
150    }
151}
152
153pub trait IndexParams: Send + Sync {
154    fn as_any(&self) -> &dyn Any;
155
156    fn index_type(&self) -> IndexType;
157
158    fn index_name(&self) -> &str;
159}
160
161#[derive(Serialize, Deserialize, Debug)]
162pub struct IndexMetadata {
163    #[serde(rename = "type")]
164    pub index_type: String,
165    pub distance_type: String,
166}