pub trait VectorIndex:
Send
+ Sync
+ Debug
+ Index {
Show 23 methods
// Required methods
fn search<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait;
fn find_partitions(
&self,
query: &Query,
) -> Result<(UInt32Array, Float32Array)>;
fn total_partitions(&self) -> usize;
fn search_in_partition<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
partition_id: usize,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait;
fn is_loadable(&self) -> bool;
fn use_residual(&self) -> bool;
fn load<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn to_batch_stream<'life0, 'async_trait>(
&'life0 self,
with_vector: bool,
) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn num_rows(&self) -> u64;
fn row_ids(&self) -> Box<dyn Iterator<Item = &u64> + '_>;
fn remap<'life0, 'life1, 'async_trait>(
&'life0 mut self,
mapping: &'life1 HashMap<u64, Option<u64>>,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
fn metric_type(&self) -> DistanceType;
fn ivf_model(&self) -> &IvfModel;
fn quantizer(&self) -> Quantizer;
fn partition_size(&self, part_id: usize) -> usize;
fn sub_index_type(&self) -> (SubIndexType, QuantizationType);
// Provided methods
fn prepare_partition_search<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
_partition_id: usize,
_query: &'life1 Query,
_pre_filter: Arc<dyn PreFilter>,
_metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<PreparedPartitionSearchHandle>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait { ... }
fn search_prepared_partition(
&self,
_prepared: PreparedPartitionSearchHandle,
_metrics: &dyn MetricsCollector,
) -> Result<RecordBatch> { ... }
fn supports_prepared_partition_search(&self) -> bool { ... }
fn auto_query_parallelism(&self, _cpu_pool_size: usize) -> usize { ... }
fn search_partitions<'async_trait>(
self: Arc<Self>,
query: Query,
partitions: Arc<UInt32Array>,
q_c_dists: Arc<Float32Array>,
start_idx: usize,
end_idx: usize,
pre_filter: Arc<dyn PreFilter>,
control: Option<Arc<dyn PartitionSearchControl>>,
metrics: Arc<dyn MetricsCollector>,
) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>
where Self: 'static + 'async_trait { ... }
fn load_partition<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
_partition_id: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait { ... }
fn partition_reader<'life0, 'life1, 'async_trait>(
&'life0 self,
_partition_id: usize,
_with_vector: bool,
_metrics: &'life1 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait { ... }
}Expand description
Vector Index for (Approximate) Nearest Neighbor (ANN) Search.
Vector indices are often built as a chain of indices. For example, IVF -> PQ or IVF -> HNSW -> SQ.
We use one trait for both the top-level and the sub-indices. Typically the top-level search is a partition-aware search and all sub-indices are whole-index searches.
Required Methods§
Sourcefn search<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
fn search<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
Search entire index for k nearest neighbors.
It returns a RecordBatch with Schema of:
use arrow_schema::{Schema, Field, DataType};
Schema::new(vec![
Field::new("_rowid", DataType::UInt64, true),
Field::new("_distance", DataType::Float32, true),
]);The pre_filter argument is used to filter out row ids that we know are
not relevant to the query. For example, it removes deleted rows or rows that
do not match a user-provided filter.
Sourcefn find_partitions(&self, query: &Query) -> Result<(UInt32Array, Float32Array)>
fn find_partitions(&self, query: &Query) -> Result<(UInt32Array, Float32Array)>
Find partitions that may contain nearest neighbors.
If maximum_nprobes is set then this method will return the partitions that are most likely to contain the nearest neighbors (e.g. the closest partitions to the query vector).
Return the partition ids and the distances between the query and the centroids, the results should be in sorted order from closest to farthest.
Sourcefn total_partitions(&self) -> usize
fn total_partitions(&self) -> usize
Get the total number of partitions in the index.
Sourcefn search_in_partition<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
partition_id: usize,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
fn search_in_partition<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
partition_id: usize,
query: &'life1 Query,
pre_filter: Arc<dyn PreFilter>,
metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<RecordBatch>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
Search a single partition for nearest neighbors.
This method should return the same results as VectorIndex::search method except
that it will only search a single partition.
Sourcefn is_loadable(&self) -> bool
fn is_loadable(&self) -> bool
If the index is loadable by IVF, so it can be a sub-index that is loaded on demand by IVF.
Sourcefn use_residual(&self) -> bool
fn use_residual(&self) -> bool
Use residual vector to search.
Sourcefn load<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn load<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Load the index from the reader on-demand.
fn to_batch_stream<'life0, 'async_trait>(
&'life0 self,
with_vector: bool,
) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn num_rows(&self) -> u64
Sourcefn remap<'life0, 'life1, 'async_trait>(
&'life0 mut self,
mapping: &'life1 HashMap<u64, Option<u64>>,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn remap<'life0, 'life1, 'async_trait>(
&'life0 mut self,
mapping: &'life1 HashMap<u64, Option<u64>>,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Remap the index according to mapping
Each item in mapping describes an old row id -> new row id pair. If old row id -> None then that row id has been deleted and can be removed from the index.
If an old row id is not in the mapping then it should be left alone.
Sourcefn metric_type(&self) -> DistanceType
fn metric_type(&self) -> DistanceType
The metric type of this vector index.
fn ivf_model(&self) -> &IvfModel
fn quantizer(&self) -> Quantizer
fn partition_size(&self, part_id: usize) -> usize
Sourcefn sub_index_type(&self) -> (SubIndexType, QuantizationType)
fn sub_index_type(&self) -> (SubIndexType, QuantizationType)
the index type of this vector index.
Provided Methods§
Sourcefn prepare_partition_search<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
_partition_id: usize,
_query: &'life1 Query,
_pre_filter: Arc<dyn PreFilter>,
_metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<PreparedPartitionSearchHandle>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
fn prepare_partition_search<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
_partition_id: usize,
_query: &'life1 Query,
_pre_filter: Arc<dyn PreFilter>,
_metrics: &'life2 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<PreparedPartitionSearchHandle>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
Asynchronously prepare a single-partition search so the CPU-heavy portion can be executed separately.
Sourcefn search_prepared_partition(
&self,
_prepared: PreparedPartitionSearchHandle,
_metrics: &dyn MetricsCollector,
) -> Result<RecordBatch>
fn search_prepared_partition( &self, _prepared: PreparedPartitionSearchHandle, _metrics: &dyn MetricsCollector, ) -> Result<RecordBatch>
Execute the synchronous portion of a previously prepared partition search.
Sourcefn supports_prepared_partition_search(&self) -> bool
fn supports_prepared_partition_search(&self) -> bool
Return true if the index supports splitting partition search into async prepare and sync execute phases.
Sourcefn auto_query_parallelism(&self, _cpu_pool_size: usize) -> usize
fn auto_query_parallelism(&self, _cpu_pool_size: usize) -> usize
Choose partition search concurrency for query_parallelism = 0.
The default keeps the single-worker sequential path. Index implementations can override this when their sub-index search work does not benefit from the sequential fast path.
Sourcefn search_partitions<'async_trait>(
self: Arc<Self>,
query: Query,
partitions: Arc<UInt32Array>,
q_c_dists: Arc<Float32Array>,
start_idx: usize,
end_idx: usize,
pre_filter: Arc<dyn PreFilter>,
control: Option<Arc<dyn PartitionSearchControl>>,
metrics: Arc<dyn MetricsCollector>,
) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>where
Self: 'static + 'async_trait,
fn search_partitions<'async_trait>(
self: Arc<Self>,
query: Query,
partitions: Arc<UInt32Array>,
q_c_dists: Arc<Float32Array>,
start_idx: usize,
end_idx: usize,
pre_filter: Arc<dyn PreFilter>,
control: Option<Arc<dyn PartitionSearchControl>>,
metrics: Arc<dyn MetricsCollector>,
) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>where
Self: 'static + 'async_trait,
Search a range of partitions and return a stream of per-partition result batches.
The default implementation searches each partition sequentially with
VectorIndex::search_in_partition. Implementations can override this
to use a more efficient execution strategy.
Sourcefn load_partition<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
_partition_id: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn load_partition<'life0, 'async_trait>(
&'life0 self,
reader: Arc<dyn Reader>,
offset: usize,
length: usize,
_partition_id: usize,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn VectorIndex>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Load the partition from the reader on-demand.
fn partition_reader<'life0, 'life1, 'async_trait>(
&'life0 self,
_partition_id: usize,
_with_vector: bool,
_metrics: &'life1 dyn MetricsCollector,
) -> Pin<Box<dyn Future<Output = Result<SendableRecordBatchStream>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Dyn Compatibility§
This trait is dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety".