use std::{any::Any, sync::Arc};
use crate::frag_reuse::FRAG_REUSE_INDEX_NAME;
use crate::mem_wal::MEM_WAL_INDEX_NAME;
use async_trait::async_trait;
use deepsize::DeepSizeOf;
use lance_core::{Error, Result};
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
pub mod frag_reuse;
pub mod mem_wal;
pub mod metrics;
pub mod optimize;
pub mod prefilter;
pub mod progress;
pub mod registry;
pub mod scalar;
pub mod traits;
pub mod vector;
pub use crate::traits::*;
pub const INDEX_FILE_NAME: &str = "index.idx";
pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";
pub const VECTOR_INDEX_VERSION: u32 = 1;
pub const IVF_RQ_INDEX_VERSION: u32 = 2;
pub const MAX_PARTITION_SIZE_FACTOR: usize = 4;
pub const MIN_PARTITION_SIZE_PERCENT: usize = 25;
pub mod pb {
#![allow(clippy::use_self)]
include!(concat!(env!("OUT_DIR"), "/lance.index.pb.rs"));
}
pub mod pbold {
#![allow(clippy::use_self)]
include!(concat!(env!("OUT_DIR"), "/lance.table.rs"));
}
#[async_trait]
pub trait Index: Send + Sync + DeepSizeOf {
fn as_any(&self) -> &dyn Any;
fn as_index(self: Arc<Self>) -> Arc<dyn Index>;
fn as_vector_index(self: Arc<Self>) -> Result<Arc<dyn vector::VectorIndex>>;
fn statistics(&self) -> Result<serde_json::Value>;
async fn prewarm(&self) -> Result<()>;
fn index_type(&self) -> IndexType;
async fn calculate_included_frags(&self) -> Result<RoaringBitmap>;
}
#[derive(Debug, PartialEq, Eq, Copy, Hash, Clone, DeepSizeOf)]
pub enum IndexType {
Scalar = 0,
BTree = 1,
Bitmap = 2,
LabelList = 3,
Inverted = 4,
NGram = 5,
FragmentReuse = 6,
MemWal = 7,
ZoneMap = 8,
BloomFilter = 9,
RTree = 10,
Vector = 100, IvfFlat = 101,
IvfSq = 102,
IvfPq = 103,
IvfHnswSq = 104,
IvfHnswPq = 105,
IvfHnswFlat = 106,
IvfRq = 107,
}
impl std::fmt::Display for IndexType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Scalar | Self::BTree => write!(f, "BTree"),
Self::Bitmap => write!(f, "Bitmap"),
Self::LabelList => write!(f, "LabelList"),
Self::Inverted => write!(f, "Inverted"),
Self::NGram => write!(f, "NGram"),
Self::FragmentReuse => write!(f, "FragmentReuse"),
Self::MemWal => write!(f, "MemWal"),
Self::ZoneMap => write!(f, "ZoneMap"),
Self::BloomFilter => write!(f, "BloomFilter"),
Self::RTree => write!(f, "RTree"),
Self::Vector | Self::IvfPq => write!(f, "IVF_PQ"),
Self::IvfFlat => write!(f, "IVF_FLAT"),
Self::IvfSq => write!(f, "IVF_SQ"),
Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
Self::IvfRq => write!(f, "IVF_RQ"),
}
}
}
impl TryFrom<i32> for IndexType {
type Error = Error;
fn try_from(value: i32) -> Result<Self> {
match value {
v if v == Self::Scalar as i32 => Ok(Self::Scalar),
v if v == Self::BTree as i32 => Ok(Self::BTree),
v if v == Self::Bitmap as i32 => Ok(Self::Bitmap),
v if v == Self::LabelList as i32 => Ok(Self::LabelList),
v if v == Self::NGram as i32 => Ok(Self::NGram),
v if v == Self::Inverted as i32 => Ok(Self::Inverted),
v if v == Self::FragmentReuse as i32 => Ok(Self::FragmentReuse),
v if v == Self::MemWal as i32 => Ok(Self::MemWal),
v if v == Self::ZoneMap as i32 => Ok(Self::ZoneMap),
v if v == Self::BloomFilter as i32 => Ok(Self::BloomFilter),
v if v == Self::Vector as i32 => Ok(Self::Vector),
v if v == Self::IvfFlat as i32 => Ok(Self::IvfFlat),
v if v == Self::IvfSq as i32 => Ok(Self::IvfSq),
v if v == Self::IvfPq as i32 => Ok(Self::IvfPq),
v if v == Self::IvfHnswSq as i32 => Ok(Self::IvfHnswSq),
v if v == Self::IvfHnswPq as i32 => Ok(Self::IvfHnswPq),
v if v == Self::IvfHnswFlat as i32 => Ok(Self::IvfHnswFlat),
v if v == Self::IvfRq as i32 => Ok(Self::IvfRq),
_ => Err(Error::invalid_input_source(
format!("the input value {} is not a valid IndexType", value).into(),
)),
}
}
}
impl TryFrom<&str> for IndexType {
type Error = Error;
fn try_from(value: &str) -> Result<Self> {
match value {
"BTree" | "BTREE" => Ok(Self::BTree),
"Bitmap" | "BITMAP" => Ok(Self::Bitmap),
"LabelList" | "LABELLIST" => Ok(Self::LabelList),
"Inverted" | "INVERTED" => Ok(Self::Inverted),
"NGram" | "NGRAM" => Ok(Self::NGram),
"ZoneMap" | "ZONEMAP" => Ok(Self::ZoneMap),
"Vector" | "VECTOR" => Ok(Self::Vector),
"IVF_FLAT" => Ok(Self::IvfFlat),
"IVF_SQ" => Ok(Self::IvfSq),
"IVF_PQ" => Ok(Self::IvfPq),
"IVF_RQ" => Ok(Self::IvfRq),
"IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
"FragmentReuse" => Ok(Self::FragmentReuse),
"MemWal" => Ok(Self::MemWal),
_ => Err(Error::invalid_input(format!(
"invalid index type: {}",
value
))),
}
}
}
impl IndexType {
pub fn is_scalar(&self) -> bool {
matches!(
self,
Self::Scalar
| Self::BTree
| Self::Bitmap
| Self::LabelList
| Self::Inverted
| Self::NGram
| Self::ZoneMap
| Self::BloomFilter
| Self::RTree,
)
}
pub fn is_vector(&self) -> bool {
matches!(
self,
Self::Vector
| Self::IvfPq
| Self::IvfHnswSq
| Self::IvfHnswPq
| Self::IvfHnswFlat
| Self::IvfFlat
| Self::IvfSq
| Self::IvfRq
)
}
pub fn is_system(&self) -> bool {
matches!(self, Self::FragmentReuse | Self::MemWal)
}
pub fn version(&self) -> i32 {
match self {
Self::Scalar => 0,
Self::BTree => 0,
Self::Bitmap => 0,
Self::LabelList => 0,
Self::Inverted => 0,
Self::NGram => 0,
Self::FragmentReuse => 0,
Self::MemWal => 0,
Self::ZoneMap => 0,
Self::BloomFilter => 0,
Self::RTree => 0,
Self::Vector
| Self::IvfFlat
| Self::IvfSq
| Self::IvfPq
| Self::IvfHnswSq
| Self::IvfHnswPq
| Self::IvfHnswFlat => VECTOR_INDEX_VERSION as i32,
Self::IvfRq => IVF_RQ_INDEX_VERSION as i32,
}
}
pub fn target_partition_size(&self) -> usize {
match self {
Self::Vector => 8192,
Self::IvfFlat => 4096,
Self::IvfSq => 8192,
Self::IvfPq => 8192,
Self::IvfHnswFlat => 1 << 20,
Self::IvfHnswSq => 1 << 20,
Self::IvfHnswPq => 1 << 20,
_ => 8192,
}
}
pub fn max_vector_version() -> u32 {
[
Self::Vector,
Self::IvfFlat,
Self::IvfSq,
Self::IvfPq,
Self::IvfHnswSq,
Self::IvfHnswPq,
Self::IvfHnswFlat,
Self::IvfRq,
]
.into_iter()
.map(|index_type| index_type.version() as u32)
.max()
.unwrap_or(VECTOR_INDEX_VERSION)
}
}
pub trait IndexParams: Send + Sync {
fn as_any(&self) -> &dyn Any;
fn index_name(&self) -> &str;
}
#[derive(Serialize, Deserialize, Debug)]
pub struct IndexMetadata {
#[serde(rename = "type")]
pub index_type: String,
pub distance_type: String,
}
pub fn is_system_index(index_meta: &lance_table::format::IndexMetadata) -> bool {
index_meta.name == FRAG_REUSE_INDEX_NAME || index_meta.name == MEM_WAL_INDEX_NAME
}
pub fn infer_system_index_type(
index_meta: &lance_table::format::IndexMetadata,
) -> Option<IndexType> {
if index_meta.name == FRAG_REUSE_INDEX_NAME {
Some(IndexType::FragmentReuse)
} else if index_meta.name == MEM_WAL_INDEX_NAME {
Some(IndexType::MemWal)
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ivf_rq_has_dedicated_index_version() {
assert!(IndexType::IvfRq.version() > IndexType::IvfPq.version());
assert_eq!(IndexType::IvfRq.version() as u32, IVF_RQ_INDEX_VERSION);
}
#[test]
fn test_max_vector_version_tracks_highest_supported() {
assert_eq!(IndexType::max_vector_version(), IVF_RQ_INDEX_VERSION);
}
}