1use std::{any::Any, sync::Arc};
13
14use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
15use crate::mem_wal::MEM_WAL_INDEX_NAME;
16use async_trait::async_trait;
17use deepsize::DeepSizeOf;
18use lance_core::{Error, Result};
19use roaring::RoaringBitmap;
20use serde::{Deserialize, Serialize};
21use snafu::location;
22use std::convert::TryFrom;
23
24pub mod frag_reuse;
25pub mod mem_wal;
26pub mod metrics;
27pub mod optimize;
28pub mod prefilter;
29pub mod registry;
30pub mod scalar;
31pub mod traits;
32pub mod vector;
33
34pub use crate::traits::*;
35
36pub const INDEX_FILE_NAME: &str = "index.idx";
37pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
42pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
43
44pub const VECTOR_INDEX_VERSION: u32 = 1;
46
47pub const MAX_PARTITION_SIZE_FACTOR: usize = 4;
54pub const MIN_PARTITION_SIZE_PERCENT: usize = 25;
55
56pub mod pb {
57 #![allow(clippy::use_self)]
58 include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
59}
60
61pub mod pbold {
62 #![allow(clippy::use_self)]
63 include!(concat!(env!("OUT_DIR"), "/lance.table.rs"));
64}
65
66#[async_trait]
69pub trait Index: Send + Sync + DeepSizeOf {
70 fn as_any(&self) -> &dyn Any;
72
73 fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
75
76 fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
78
79 fn statistics(&self) -> Result<serde_json::Value>;
81
82 async fn prewarm(&self) -> Result<()>;
86
87 fn index_type(&self) -> IndexType;
89
90 async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
95}
96
97#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
99pub enum IndexType {
100 Scalar = 0, BTree = 1, Bitmap = 2, LabelList = 3, Inverted = 4, NGram = 5, FragmentReuse = 6,
114
115 MemWal = 7,
116
117 ZoneMap = 8, BloomFilter = 9, RTree = 10, Vector = 100, IvfFlat = 101,
127 IvfSq = 102,
128 IvfPq = 103,
129 IvfHnswSq = 104,
130 IvfHnswPq = 105,
131 IvfHnswFlat = 106,
132 IvfRq = 107,
133}
134
135impl std::fmt::Display for IndexType {
136 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
137 match self {
138 Self::Scalar | Self::BTree => write!(f, "BTree"),
139 Self::Bitmap => write!(f, "Bitmap"),
140 Self::LabelList => write!(f, "LabelList"),
141 Self::Inverted => write!(f, "Inverted"),
142 Self::NGram => write!(f, "NGram"),
143 Self::FragmentReuse => write!(f, "FragmentReuse"),
144 Self::MemWal => write!(f, "MemWal"),
145 Self::ZoneMap => write!(f, "ZoneMap"),
146 Self::BloomFilter => write!(f, "BloomFilter"),
147 Self::RTree => write!(f, "RTree"),
148 Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
149 Self::IvfFlat => write!(f, "IVF_FLAT"),
150 Self::IvfSq => write!(f, "IVF_SQ"),
151 Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
152 Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
153 Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
154 Self::IvfRq => write!(f, "IVF_RQ"),
155 }
156 }
157}
158
159impl TryFrom<i32> for IndexType {
160 type Error = Error;
161
162 fn try_from(value: i32) -> Result<Self> {
163 match value {
164 v if v == Self::Scalar as i32 => Ok(Self::Scalar),
165 v if v == Self::BTree as i32 => Ok(Self::BTree),
166 v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
167 v if v == Self::LabelList as i32 => Ok(Self::LabelList),
168 v if v == Self::NGram as i32 => Ok(Self::NGram),
169 v if v == Self::Inverted as i32 => Ok(Self::Inverted),
170 v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
171 v if v == Self::MemWal as i32 => Ok(Self::MemWal),
172 v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
173 v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
174 v if v == Self::Vector as i32 => Ok(Self::Vector),
175 v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
176 v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
177 v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
178 v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
179 v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
180 v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
181 _ => Err(Error::InvalidInput {
182 source: format!("the input value {} is not a valid IndexType", value).into(),
183 location: location!(),
184 }),
185 }
186 }
187}
188
189impl TryFrom<&str> for IndexType {
190 type Error = Error;
191
192 fn try_from(value: &str) -> Result<Self> {
193 match value {
194 "BTree" => Ok(Self::BTree),
195 "Bitmap" => Ok(Self::Bitmap),
196 "LabelList" => Ok(Self::LabelList),
197 "Inverted" => Ok(Self::Inverted),
198 "NGram" => Ok(Self::NGram),
199 "FragmentReuse" => Ok(Self::FragmentReuse),
200 "MemWal" => Ok(Self::MemWal),
201 "ZoneMap" => Ok(Self::ZoneMap),
202 "Vector" => Ok(Self::Vector),
203 "IVF_FLAT" => Ok(Self::IvfFlat),
204 "IVF_SQ" => Ok(Self::IvfSq),
205 "IVF_PQ" => Ok(Self::IvfPq),
206 "IVF_RQ" => Ok(Self::IvfRq),
207 "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
208 "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
209 "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
210 _ => Err(Error::invalid_input(
211 format!("invalid index type: {}", value),
212 location!(),
213 )),
214 }
215 }
216}
217
218impl IndexType {
219 pub fn is_scalar(&self) -> bool {
220 matches!(
221 self,
222 Self::Scalar
223 | Self::BTree
224 | Self::Bitmap
225 | Self::LabelList
226 | Self::Inverted
227 | Self::NGram
228 | Self::ZoneMap
229 | Self::BloomFilter
230 | Self::RTree,
231 )
232 }
233
234 pub fn is_vector(&self) -> bool {
235 matches!(
236 self,
237 Self::Vector
238 | Self::IvfPq
239 | Self::IvfHnswSq
240 | Self::IvfHnswPq
241 | Self::IvfHnswFlat
242 | Self::IvfFlat
243 | Self::IvfSq
244 | Self::IvfRq
245 )
246 }
247
248 pub fn is_system(&self) -> bool {
249 matches!(self, Self::FragmentReuse | Self::MemWal)
250 }
251
252 pub fn version(&self) -> i32 {
258 match self {
259 Self::Scalar => 0,
260 Self::BTree => 0,
261 Self::Bitmap => 0,
262 Self::LabelList => 0,
263 Self::Inverted => 0,
264 Self::NGram => 0,
265 Self::FragmentReuse => 0,
266 Self::MemWal => 0,
267 Self::ZoneMap => 0,
268 Self::BloomFilter => 0,
269 Self::RTree => 0,
270
271 Self::Vector
274 | Self::IvfFlat
275 | Self::IvfSq
276 | Self::IvfPq
277 | Self::IvfHnswSq
278 | Self::IvfHnswPq
279 | Self::IvfHnswFlat
280 | Self::IvfRq => 1,
281 }
282 }
283
284 pub fn target_partition_size(&self) -> usize {
291 match self {
292 Self::Vector => 8192,
293 Self::IvfFlat => 4096,
294 Self::IvfSq => 8192,
295 Self::IvfPq => 8192,
296 Self::IvfHnswFlat => 1 << 20,
297 Self::IvfHnswSq => 1 << 20,
298 Self::IvfHnswPq => 1 << 20,
299 _ => 8192,
300 }
301 }
302}
303
304pub trait IndexParams: Send + Sync {
305 fn as_any(&self) -> &dyn Any;
306
307 fn index_name(&self) -> &str;
308}
309
310#[derive(Serialize, Deserialize, Debug)]
311pub struct IndexMetadata {
312 #[serde(rename = "type")]
313 pub index_type: String,
314 pub distance_type: String,
315}
316
317pub fn is_system_index(index_meta: &lance_table::format::IndexMetadata) -> bool {
318 index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
319}
320
321pub fn infer_system_index_type(
322 index_meta: &lance_table::format::IndexMetadata,
323) -> Option<IndexType> {
324 if index_meta.name == FRAG_REUSE_INDEX_NAME {
325 Some(IndexType::FragmentReuse)
326 } else if index_meta.name == MEM_WAL_INDEX_NAME {
327 Some(IndexType::MemWal)
328 } else {
329 None
330 }
331}