use std::sync::Arc;
use async_trait::async_trait;
use datafusion::execution::SendableRecordBatchStream;
use lance_core::{Error, Result};
use crate::{IndexParams, IndexType, optimize::OptimizeOptions};
use lance_table::format::IndexMetadata;
use uuid::Uuid;
#[derive(Debug, Default)]
pub struct IndexCriteria<'a> {
pub for_column: Option<&'a str>,
pub has_name: Option<&'a str>,
pub must_support_fts: bool,
pub must_support_exact_equality: bool,
}
impl<'a> IndexCriteria<'a> {
pub fn for_column(mut self, column: &'a str) -> Self {
self.for_column = Some(column);
self
}
pub fn with_name(mut self, name: &'a str) -> Self {
self.has_name = Some(name);
self
}
pub fn supports_fts(mut self) -> Self {
self.must_support_fts = true;
self
}
pub fn supports_exact_equality(mut self) -> Self {
self.must_support_exact_equality = true;
self
}
}
#[deprecated(since = "0.39.0", note = "Use IndexCriteria instead")]
pub type ScalarIndexCriteria<'a> = IndexCriteria<'a>;
pub trait IndexDescription: Send + Sync {
fn name(&self) -> &str;
fn metadata(&self) -> &[IndexMetadata];
fn type_url(&self) -> &str;
fn index_type(&self) -> &str;
fn rows_indexed(&self) -> u64;
fn field_ids(&self) -> &[u32];
fn details(&self) -> Result<String>;
}
#[async_trait]
pub trait DatasetIndexExt {
type IndexBuilder<'a>
where
Self: 'a;
fn create_index_builder<'a>(
&'a mut self,
columns: &'a [&'a str],
index_type: IndexType,
params: &'a dyn IndexParams,
) -> Self::IndexBuilder<'a>;
async fn create_index(
&mut self,
columns: &[&str],
index_type: IndexType,
name: Option<String>,
params: &dyn IndexParams,
replace: bool,
) -> Result<IndexMetadata>;
async fn drop_index(&mut self, name: &str) -> Result<()>;
async fn prewarm_index(&self, name: &str) -> Result<()>;
async fn load_indices(&self) -> Result<Arc<Vec<IndexMetadata>>>;
async fn load_index(&self, uuid: &str) -> Result<Option<IndexMetadata>> {
self.load_indices().await.map(|indices| {
indices
.iter()
.find(|idx| idx.uuid.to_string() == uuid)
.cloned()
})
}
async fn load_indices_by_name(&self, name: &str) -> Result<Vec<IndexMetadata>> {
self.load_indices().await.map(|indices| {
indices
.iter()
.filter(|idx| idx.name == name)
.cloned()
.collect()
})
}
async fn load_index_by_name(&self, name: &str) -> Result<Option<IndexMetadata>> {
let indices = self.load_indices_by_name(name).await?;
if indices.is_empty() {
Ok(None)
} else if indices.len() == 1 {
Ok(Some(indices[0].clone()))
} else {
Err(Error::index(format!(
"Found multiple indices of the same name: {:?}, please use load_indices_by_name",
indices.iter().map(|idx| &idx.name).collect::<Vec<_>>()
)))
}
}
async fn describe_indices<'a, 'b>(
&'a self,
criteria: Option<IndexCriteria<'b>>,
) -> Result<Vec<Arc<dyn IndexDescription>>>;
async fn load_scalar_index<'a, 'b>(
&'a self,
criteria: IndexCriteria<'b>,
) -> Result<Option<IndexMetadata>>;
async fn optimize_indices(&mut self, options: &OptimizeOptions) -> Result<()>;
async fn index_statistics(&self, index_name: &str) -> Result<String>;
async fn commit_existing_index(
&mut self,
index_name: &str,
column: &str,
index_id: Uuid,
) -> Result<()>;
async fn read_index_partition(
&self,
index_name: &str,
partition_id: usize,
with_vector: bool,
) -> Result<SendableRecordBatchStream>;
}