use arrow_schema::Schema as ArrowSchema;
use bytes::Bytes;
use lance_core::{Error, Result};
use lance_file::reader::FileReader as V2Reader;
use lance_file::writer::FileWriter;
use lance_linalg::distance::DistanceType;
use prost::Message;
use crate::pb;
use crate::vector::ivf::storage::{IVF_METADATA_KEY, IvfModel};
use crate::vector::pq::storage::PQ_METADATA_KEY;
use crate::vector::sq::storage::SQ_METADATA_KEY;
use crate::vector::{PQ_CODE_COLUMN, SQ_CODE_COLUMN};
use crate::{INDEX_METADATA_SCHEMA_KEY, IndexMetadata as IndexMetaSchema};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum SupportedIvfIndexType {
IvfFlat,
IvfPq,
IvfSq,
IvfHnswFlat,
IvfHnswPq,
IvfHnswSq,
}
impl SupportedIvfIndexType {
pub fn as_str(&self) -> &'static str {
match self {
Self::IvfFlat => "IVF_FLAT",
Self::IvfPq => "IVF_PQ",
Self::IvfSq => "IVF_SQ",
Self::IvfHnswFlat => "IVF_HNSW_FLAT",
Self::IvfHnswPq => "IVF_HNSW_PQ",
Self::IvfHnswSq => "IVF_HNSW_SQ",
}
}
pub fn from_index_type_str(s: &str) -> Option<Self> {
match s {
"IVF_FLAT" => Some(Self::IvfFlat),
"IVF_PQ" => Some(Self::IvfPq),
"IVF_SQ" => Some(Self::IvfSq),
"IVF_HNSW_FLAT" => Some(Self::IvfHnswFlat),
"IVF_HNSW_PQ" => Some(Self::IvfHnswPq),
"IVF_HNSW_SQ" => Some(Self::IvfHnswSq),
_ => None,
}
}
pub fn detect_from_reader_and_schema(reader: &V2Reader, schema: &ArrowSchema) -> Result<Self> {
let has_pq_code_col = schema.fields.iter().any(|f| f.name() == PQ_CODE_COLUMN);
let has_sq_code_col = schema.fields.iter().any(|f| f.name() == SQ_CODE_COLUMN);
let is_pq = reader
.metadata()
.file_schema
.metadata
.contains_key(PQ_METADATA_KEY)
|| has_pq_code_col;
let is_sq = reader
.metadata()
.file_schema
.metadata
.contains_key(SQ_METADATA_KEY)
|| has_sq_code_col;
let has_hnsw_vector_id_col = schema.fields.iter().any(|f| f.name() == "__vector_id");
let has_hnsw_pointer_col = schema.fields.iter().any(|f| f.name() == "__pointer");
let has_hnsw = has_hnsw_vector_id_col || has_hnsw_pointer_col;
let index_type = match (has_hnsw, is_pq, is_sq) {
(false, false, false) => Self::IvfFlat,
(false, true, false) => Self::IvfPq,
(false, false, true) => Self::IvfSq,
(true, false, false) => Self::IvfHnswFlat,
(true, true, false) => Self::IvfHnswPq,
(true, false, true) => Self::IvfHnswSq,
_ => {
return Err(Error::not_supported_source(
"Unsupported index type combination detected".into(),
));
}
};
Ok(index_type)
}
}
pub async fn write_unified_ivf_and_index_metadata(
w: &mut FileWriter,
ivf_model: &IvfModel,
dt: DistanceType,
idx_type: SupportedIvfIndexType,
) -> Result<()> {
let pb_ivf: pb::Ivf = (ivf_model).try_into()?;
let pos = w
.add_global_buffer(Bytes::from(pb_ivf.encode_to_vec()))
.await?;
w.add_schema_metadata(IVF_METADATA_KEY, pos.to_string());
let idx_meta = IndexMetaSchema {
index_type: idx_type.as_str().to_string(),
distance_type: dt.to_string(),
};
w.add_schema_metadata(INDEX_METADATA_SCHEMA_KEY, serde_json::to_string(&idx_meta)?);
Ok(())
}