1use std::{any::Any, sync::Arc};
13
14use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
15use crate::mem_wal::MEM_WAL_INDEX_NAME;
16use async_trait::async_trait;
17use deepsize::DeepSizeOf;
18use lance_core::{Error, Result};
19use roaring::RoaringBitmap;
20use serde::{Deserialize, Serialize};
21use snafu::location;
22use std::convert::TryFrom;
23
24pub mod frag_reuse;
25pub mod mem_wal;
26pub mod metrics;
27pub mod optimize;
28pub mod prefilter;
29pub mod scalar;
30pub mod traits;
31pub mod vector;
32
33pub use crate::traits::*;
34
35pub const INDEX_FILE_NAME: &str = "index.idx";
36pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
41pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
42
43pub const VECTOR_INDEX_VERSION: u32 = 1;
45
46pub mod pb {
47 #![allow(clippy::use_self)]
48 include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
49}
50
51#[async_trait]
54pub trait Index: Send + Sync + DeepSizeOf {
55 fn as_any(&self) -> &dyn Any;
57
58 fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
60
61 fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
63
64 fn statistics(&self) -> Result<serde_json::Value>;
66
67 async fn prewarm(&self) -> Result<()>;
71
72 fn index_type(&self) -> IndexType;
74
75 async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
80}
81
82#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
84pub enum IndexType {
85 Scalar = 0, BTree = 1, Bitmap = 2, LabelList = 3, Inverted = 4, NGram = 5, FragmentReuse = 6,
99
100 MemWal = 7,
101
102 ZoneMap = 8, BloomFilter = 9, Vector = 100, IvfFlat = 101,
110 IvfSq = 102,
111 IvfPq = 103,
112 IvfHnswSq = 104,
113 IvfHnswPq = 105,
114 IvfHnswFlat = 106,
115}
116
117impl std::fmt::Display for IndexType {
118 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
119 match self {
120 Self::Scalar | Self::BTree => write!(f, "BTree"),
121 Self::Bitmap => write!(f, "Bitmap"),
122 Self::LabelList => write!(f, "LabelList"),
123 Self::Inverted => write!(f, "Inverted"),
124 Self::NGram => write!(f, "NGram"),
125 Self::FragmentReuse => write!(f, "FragmentReuse"),
126 Self::MemWal => write!(f, "MemWal"),
127 Self::ZoneMap => write!(f, "ZoneMap"),
128 Self::BloomFilter => write!(f, "BloomFilter"),
129 Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
130 Self::IvfFlat => write!(f, "IVF_FLAT"),
131 Self::IvfSq => write!(f, "IVF_SQ"),
132 Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
133 Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
134 Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
135 }
136 }
137}
138
139impl TryFrom<i32> for IndexType {
140 type Error = Error;
141
142 fn try_from(value: i32) -> Result<Self> {
143 match value {
144 v if v == Self::Scalar as i32 => Ok(Self::Scalar),
145 v if v == Self::BTree as i32 => Ok(Self::BTree),
146 v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
147 v if v == Self::LabelList as i32 => Ok(Self::LabelList),
148 v if v == Self::NGram as i32 => Ok(Self::NGram),
149 v if v == Self::Inverted as i32 => Ok(Self::Inverted),
150 v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
151 v if v == Self::MemWal as i32 => Ok(Self::MemWal),
152 v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
153 v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
154 v if v == Self::Vector as i32 => Ok(Self::Vector),
155 v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
156 v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
157 v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
158 v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
159 v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
160 v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
161 _ => Err(Error::InvalidInput {
162 source: format!("the input value {} is not a valid IndexType", value).into(),
163 location: location!(),
164 }),
165 }
166 }
167}
168
169impl TryFrom<&str> for IndexType {
170 type Error = Error;
171
172 fn try_from(value: &str) -> Result<Self> {
173 match value {
174 "BTree" => Ok(Self::BTree),
175 "Bitmap" => Ok(Self::Bitmap),
176 "LabelList" => Ok(Self::LabelList),
177 "Inverted" => Ok(Self::Inverted),
178 "NGram" => Ok(Self::NGram),
179 "FragmentReuse" => Ok(Self::FragmentReuse),
180 "MemWal" => Ok(Self::MemWal),
181 "ZoneMap" => Ok(Self::ZoneMap),
182 "Vector" => Ok(Self::Vector),
183 "IVF_FLAT" => Ok(Self::IvfFlat),
184 "IVF_SQ" => Ok(Self::IvfSq),
185 "IVF_PQ" => Ok(Self::IvfPq),
186 "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
187 "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
188 "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
189 _ => Err(Error::invalid_input(
190 format!("invalid index type: {}", value),
191 location!(),
192 )),
193 }
194 }
195}
196
197impl IndexType {
198 pub fn is_scalar(&self) -> bool {
199 matches!(
200 self,
201 Self::Scalar
202 | Self::BTree
203 | Self::Bitmap
204 | Self::LabelList
205 | Self::Inverted
206 | Self::NGram
207 | Self::ZoneMap
208 | Self::BloomFilter
209 )
210 }
211
212 pub fn is_vector(&self) -> bool {
213 matches!(
214 self,
215 Self::Vector
216 | Self::IvfPq
217 | Self::IvfHnswSq
218 | Self::IvfHnswPq
219 | Self::IvfHnswFlat
220 | Self::IvfFlat
221 | Self::IvfSq
222 )
223 }
224
225 pub fn is_system(&self) -> bool {
226 matches!(self, Self::FragmentReuse | Self::MemWal)
227 }
228
229 pub fn version(&self) -> i32 {
235 match self {
236 Self::Scalar => 0,
237 Self::BTree => 0,
238 Self::Bitmap => 0,
239 Self::LabelList => 0,
240 Self::Inverted => 0,
241 Self::NGram => 0,
242 Self::FragmentReuse => 0,
243 Self::MemWal => 0,
244 Self::ZoneMap => 0,
245 Self::BloomFilter => 0,
246
247 Self::Vector
250 | Self::IvfFlat
251 | Self::IvfSq
252 | Self::IvfPq
253 | Self::IvfHnswSq
254 | Self::IvfHnswPq
255 | Self::IvfHnswFlat => 1,
256 }
257 }
258
259 pub fn target_partition_size(&self) -> usize {
266 match self {
267 Self::Vector => 8192,
268 Self::IvfFlat => 4096,
269 Self::IvfSq => 8192,
270 Self::IvfPq => 8192,
271 Self::IvfHnswFlat => 1 << 20,
272 Self::IvfHnswSq => 1 << 20,
273 Self::IvfHnswPq => 1 << 20,
274 _ => 8192,
275 }
276 }
277}
278
279pub trait IndexParams: Send + Sync {
280 fn as_any(&self) -> &dyn Any;
281
282 fn index_name(&self) -> &str;
283}
284
285#[derive(Serialize, Deserialize, Debug)]
286pub struct IndexMetadata {
287 #[serde(rename = "type")]
288 pub index_type: String,
289 pub distance_type: String,
290}
291
292pub fn is_system_index(index_meta: &lance_table::format::Index) -> bool {
293 index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
294}
295
296pub fn infer_system_index_type(index_meta: &lance_table::format::Index) -> Option<IndexType> {
297 if index_meta.name == FRAG_REUSE_INDEX_NAME {
298 Some(IndexType::FragmentReuse)
299 } else if index_meta.name == MEM_WAL_INDEX_NAME {
300 Some(IndexType::MemWal)
301 } else {
302 None
303 }
304}