#![doc = document_features::document_features!()]
#![allow(clippy::needless_range_loop)] #![warn(missing_docs)]
#[cfg(feature = "mimalloc")]
use mimalloc::MiMalloc;
#[cfg(feature = "mimalloc")]
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
pub mod cpu;
pub mod prelude;
pub mod utils;
#[cfg(feature = "gpu")]
pub mod gpu;
#[cfg(feature = "quantised")]
pub mod quantised;
#[cfg(feature = "binary")]
pub mod binary;
use faer::MatRef;
use rayon::prelude::*;
use std::sync::{
atomic::{AtomicUsize, Ordering},
Arc,
};
use thousands::*;
#[cfg(feature = "gpu")]
use cubecl::prelude::*;
#[cfg(feature = "quantised")]
use std::ops::AddAssign;
#[cfg(feature = "binary")]
use bytemuck::Pod;
#[cfg(feature = "binary")]
use std::path::Path;
use crate::cpu::{
annoy::*, ball_tree::*, exhaustive::*, hnsw::*, ivf::*, kd_forest::*, lsh::*, nndescent::*,
vamana::*,
};
use crate::prelude::*;
#[cfg(feature = "binary")]
use crate::binary::{exhaustive_binary::*, exhaustive_rabitq::*, ivf_binary::*, ivf_rabitq::*};
#[cfg(feature = "gpu")]
use crate::gpu::{exhaustive_gpu::*, ivf_gpu::*, nndescent_gpu::*};
#[cfg(feature = "quantised")]
use crate::quantised::{
exhaustive_bf16::*, exhaustive_opq::*, exhaustive_pq::*, exhaustive_sq8::*, ivf_bf16::*,
ivf_opq::*, ivf_pq::*, ivf_sq8::*,
};
fn query_parallel<T, F>(
n_samples: usize,
return_dist: bool,
verbose: bool,
query_fn: F,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: Send,
F: Fn(usize) -> (Vec<usize>, Vec<T>) + Sync,
{
let counter = Arc::new(AtomicUsize::new(0));
let results: Vec<(Vec<usize>, Vec<T>)> = (0..n_samples)
.into_par_iter()
.map(|i| {
let result = query_fn(i);
if verbose {
let count = counter.fetch_add(1, Ordering::Relaxed) + 1;
if count.is_multiple_of(100_000) {
println!(
" Processed {} / {} samples.",
count.separate_with_underscores(),
n_samples.separate_with_underscores()
);
}
}
result
})
.collect();
if return_dist {
let (indices, distances) = results.into_iter().unzip();
(indices, Some(distances))
} else {
let indices: Vec<Vec<usize>> = results.into_iter().map(|(idx, _)| idx).collect();
(indices, None)
}
}
fn query_parallel_with_flags<T, F>(
n_samples: usize,
return_dist: bool,
verbose: bool,
query_fn: F,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: Send,
F: Fn(usize) -> (Vec<usize>, Vec<T>, bool) + Sync,
{
let counter = Arc::new(AtomicUsize::new(0));
let results: Vec<(Vec<usize>, Vec<T>, bool)> = (0..n_samples)
.into_par_iter()
.map(|i| {
let result = query_fn(i);
if verbose {
let count = counter.fetch_add(1, Ordering::Relaxed) + 1;
if count.is_multiple_of(100_000) {
println!(
" Processed {} / {} samples.",
count.separate_with_underscores(),
n_samples.separate_with_underscores()
);
}
}
result
})
.collect();
let mut random: usize = 0;
let mut indices: Vec<Vec<usize>> = Vec::with_capacity(results.len());
let mut distances: Vec<Vec<T>> = Vec::with_capacity(results.len());
for (idx, dist, rnd) in results {
if rnd {
random += 1;
}
indices.push(idx);
distances.push(dist);
}
if (random as f32) / (n_samples as f32) >= 0.01 {
println!("More than 1% of samples were not represented in the buckets.");
println!("Please verify underlying data");
}
if return_dist {
(indices, Some(distances))
} else {
(indices, None)
}
}
pub fn build_exhaustive_index<T>(mat: MatRef<T>, dist_metric: &str) -> ExhaustiveIndex<T>
where
T: AnnSearchFloat,
{
let metric = parse_ann_dist(dist_metric).unwrap_or_default();
ExhaustiveIndex::new(mat, metric)
}
pub fn query_exhaustive_index<T>(
query_mat: MatRef<T>,
index: &ExhaustiveIndex<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k)
})
}
pub fn query_exhaustive_self<T>(
index: &ExhaustiveIndex<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, return_dist, verbose)
}
pub fn build_annoy_index<T>(
mat: MatRef<T>,
dist_metric: String,
n_trees: usize,
seed: usize,
) -> AnnoyIndex<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(&dist_metric).unwrap_or_default();
AnnoyIndex::new(mat, n_trees, ann_dist, seed)
}
pub fn query_annoy_index<T>(
query_mat: MatRef<T>,
index: &AnnoyIndex<T>,
k: usize,
search_budget: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, search_budget)
})
}
pub fn query_annoy_self<T>(
index: &AnnoyIndex<T>,
k: usize,
search_budget: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, search_budget, return_dist, verbose)
}
pub fn build_balltree_index<T>(mat: MatRef<T>, dist_metric: String, seed: usize) -> BallTreeIndex<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(&dist_metric).unwrap_or_default();
BallTreeIndex::new(mat, ann_dist, seed)
}
pub fn query_balltree_index<T>(
query_mat: MatRef<T>,
index: &BallTreeIndex<T>,
k: usize,
search_budget: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, search_budget)
})
}
pub fn query_balltree_self<T>(
index: &BallTreeIndex<T>,
k: usize,
search_budget: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, search_budget, return_dist, verbose)
}
pub fn build_hnsw_index<T>(
mat: MatRef<T>,
m: usize,
ef_construction: usize,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> HnswIndex<T>
where
T: AnnSearchFloat,
HnswIndex<T>: HnswState<T>,
{
HnswIndex::build(mat, m, ef_construction, dist_metric, seed, verbose)
}
pub fn query_hnsw_index<T>(
query_mat: MatRef<T>,
index: &HnswIndex<T>,
k: usize,
ef_search: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
HnswIndex<T>: HnswState<T>,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, ef_search)
})
}
pub fn query_hnsw_self<T>(
index: &HnswIndex<T>,
k: usize,
ef_search: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
HnswIndex<T>: HnswState<T>,
{
index.generate_knn(k, ef_search, return_dist, verbose)
}
pub fn build_ivf_index<T>(
mat: MatRef<T>,
nlist: Option<usize>,
max_iters: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> IvfIndex<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
IvfIndex::build(mat, ann_dist, nlist, max_iters, seed, verbose)
}
pub fn query_ivf_index<T>(
query_mat: MatRef<T>,
index: &IvfIndex<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, nprobe)
})
}
pub fn query_ivf_self<T>(
index: &IvfIndex<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, nprobe, return_dist, verbose)
}
pub fn build_kd_tree_index<T>(
mat: MatRef<T>,
dist_metric: String,
n_trees: usize,
seed: usize,
) -> KdTreeIndex<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(&dist_metric).unwrap_or_default();
KdTreeIndex::new(mat, n_trees, ann_dist, seed)
}
pub fn query_kd_tree_index<T>(
query_mat: MatRef<T>,
index: &KdTreeIndex<T>,
k: usize,
search_budget: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, search_budget)
})
}
pub fn query_kd_tree_self<T>(
index: &KdTreeIndex<T>,
k: usize,
search_budget: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, search_budget, return_dist, verbose)
}
pub fn build_lsh_index<T>(
mat: MatRef<T>,
dist_metric: &str,
num_tables: usize,
bits_per_hash: usize,
seed: usize,
) -> LSHIndex<T>
where
T: AnnSearchFloat,
{
let metric = parse_ann_dist(dist_metric).unwrap_or_default();
LSHIndex::new(mat, metric, num_tables, bits_per_hash, seed)
}
pub fn query_lsh_index<T>(
query_mat: MatRef<T>,
index: &LSHIndex<T>,
k: usize,
n_probe: usize,
max_candidates: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel_with_flags(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, max_candidates, n_probe)
})
}
pub fn query_lsh_self<T>(
index: &LSHIndex<T>,
k: usize,
n_probe: Option<usize>,
max_candidates: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
let n_probe = n_probe.unwrap_or(index.num_bits() / 2);
index.generate_knn(k, max_candidates, n_probe, return_dist, verbose)
}
#[allow(clippy::too_many_arguments)]
pub fn build_nndescent_index<T>(
mat: MatRef<T>,
dist_metric: &str,
delta: T,
diversify_prob: T,
k: Option<usize>,
max_iter: Option<usize>,
max_candidates: Option<usize>,
n_tree: Option<usize>,
seed: usize,
verbose: bool,
) -> NNDescent<T>
where
T: AnnSearchFloat,
NNDescent<T>: ApplySortedUpdates<T>,
NNDescent<T>: NNDescentQuery<T>,
{
let metric = parse_ann_dist(dist_metric).unwrap_or(Dist::Cosine);
NNDescent::new(
mat,
metric,
k,
max_candidates,
max_iter,
n_tree,
delta,
diversify_prob,
seed,
verbose,
)
}
pub fn query_nndescent_index<T>(
query_mat: MatRef<T>,
index: &NNDescent<T>,
k: usize,
ef_search: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
NNDescent<T>: ApplySortedUpdates<T>,
NNDescent<T>: NNDescentQuery<T>,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, ef_search)
})
}
pub fn query_nndescent_self<T>(
index: &NNDescent<T>,
k: usize,
ef_search: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
NNDescent<T>: ApplySortedUpdates<T>,
NNDescent<T>: NNDescentQuery<T>,
{
index.generate_knn(k, ef_search, return_dist, verbose)
}
pub fn build_vamana_index<T>(
mat: MatRef<T>,
r: usize,
l_build: usize,
alpha_pass1: f32,
alpha_pass2: f32,
dist_metric: &str,
seed: usize,
) -> VamanaIndex<T>
where
T: AnnSearchFloat,
VamanaIndex<T>: VamanaState<T>,
{
let metric = parse_ann_dist(dist_metric).unwrap_or(Dist::Euclidean);
VamanaIndex::build(mat, metric, r, l_build, alpha_pass1, alpha_pass2, seed)
}
pub fn query_vamana_index<T>(
query_mat: MatRef<T>,
index: &VamanaIndex<T>,
k: usize,
ef_search: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
VamanaIndex<T>: VamanaState<T>,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, ef_search)
})
}
pub fn query_vamana_self<T>(
index: &VamanaIndex<T>,
k: usize,
ef_search: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
VamanaIndex<T>: VamanaState<T>,
{
index.generate_knn(k, ef_search, return_dist, verbose)
}
#[cfg(feature = "quantised")]
pub fn build_exhaustive_bf16_index<T>(
mat: MatRef<T>,
dist_metric: &str,
verbose: bool,
) -> ExhaustiveIndexBf16<T>
where
T: AnnSearchFloat + Bf16Compatible,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
if verbose {
println!(
"Building exhaustive BF16 index with {} samples",
mat.nrows()
);
}
ExhaustiveIndexBf16::new(mat, ann_dist)
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_bf16_index<T>(
query_mat: MatRef<T>,
index: &ExhaustiveIndexBf16<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Bf16Compatible,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k)
})
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_bf16_self<T>(
index: &ExhaustiveIndexBf16<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Bf16Compatible,
{
index.generate_knn(k, return_dist, verbose)
}
#[cfg(feature = "quantised")]
pub fn build_exhaustive_sq8_index<T>(
mat: MatRef<T>,
dist_metric: &str,
verbose: bool,
) -> ExhaustiveSq8Index<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
if verbose {
println!("Building exhaustive SQ8 index with {} samples", mat.nrows());
}
ExhaustiveSq8Index::new(mat, ann_dist)
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_sq8_index<T>(
query_mat: MatRef<T>,
index: &ExhaustiveSq8Index<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k)
})
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_sq8_self<T>(
index: &ExhaustiveSq8Index<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, return_dist, verbose)
}
#[cfg(feature = "quantised")]
#[allow(clippy::too_many_arguments)]
pub fn build_exhaustive_pq_index<T>(
mat: MatRef<T>,
m: usize,
max_iters: Option<usize>,
n_pq_centroids: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> ExhaustivePqIndex<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
ExhaustivePqIndex::build(mat, m, ann_dist, max_iters, n_pq_centroids, seed, verbose)
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_pq_index<T>(
query_mat: MatRef<T>,
index: &ExhaustivePqIndex<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k)
})
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_pq_index_self<T>(
index: &ExhaustivePqIndex<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, return_dist, verbose)
}
#[cfg(feature = "quantised")]
#[allow(clippy::too_many_arguments)]
pub fn build_exhaustive_opq_index<T>(
mat: MatRef<T>,
m: usize,
max_iters: Option<usize>,
n_pq_centroids: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> ExhaustiveOpqIndex<T>
where
T: AnnSearchFloat + AddAssign,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
ExhaustiveOpqIndex::build(mat, m, ann_dist, max_iters, n_pq_centroids, seed, verbose)
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_opq_index<T>(
query_mat: MatRef<T>,
index: &ExhaustiveOpqIndex<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + AddAssign,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k)
})
}
#[cfg(feature = "quantised")]
pub fn query_exhaustive_opq_index_self<T>(
index: &ExhaustiveOpqIndex<T>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + AddAssign,
{
index.generate_knn(k, return_dist, verbose)
}
#[cfg(feature = "quantised")]
pub fn build_ivf_bf16_index<T>(
mat: MatRef<T>,
nlist: Option<usize>,
max_iters: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> IvfIndexBf16<T>
where
T: AnnSearchFloat + Bf16Compatible,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
IvfIndexBf16::build(mat, ann_dist, nlist, max_iters, seed, verbose)
}
#[cfg(feature = "quantised")]
pub fn query_ivf_bf16_index<T>(
query_mat: MatRef<T>,
index: &IvfIndexBf16<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Bf16Compatible,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, nprobe)
})
}
#[cfg(feature = "quantised")]
pub fn query_ivf_bf16_self<T>(
index: &IvfIndexBf16<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Bf16Compatible,
{
index.generate_knn(k, nprobe, return_dist, verbose)
}
#[cfg(feature = "quantised")]
pub fn build_ivf_sq8_index<T>(
mat: MatRef<T>,
nlist: Option<usize>,
max_iters: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> IvfSq8Index<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
IvfSq8Index::build(mat, nlist, ann_dist, max_iters, seed, verbose)
}
#[cfg(feature = "quantised")]
pub fn query_ivf_sq8_index<T>(
query_mat: MatRef<T>,
index: &IvfSq8Index<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, nprobe)
})
}
#[cfg(feature = "quantised")]
pub fn query_ivf_sq8_self<T>(
index: &IvfSq8Index<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, nprobe, return_dist, verbose)
}
#[cfg(feature = "quantised")]
#[allow(clippy::too_many_arguments)]
pub fn build_ivf_pq_index<T>(
mat: MatRef<T>,
nlist: Option<usize>,
m: usize,
max_iters: Option<usize>,
n_pq_centroids: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> IvfPqIndex<T>
where
T: AnnSearchFloat,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
IvfPqIndex::build(
mat,
nlist,
m,
ann_dist,
max_iters,
n_pq_centroids,
seed,
verbose,
)
}
#[cfg(feature = "quantised")]
pub fn query_ivf_pq_index<T>(
query_mat: MatRef<T>,
index: &IvfPqIndex<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, nprobe)
})
}
#[cfg(feature = "quantised")]
pub fn query_ivf_pq_index_self<T>(
index: &IvfPqIndex<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat,
{
index.generate_knn(k, nprobe, return_dist, verbose)
}
#[cfg(feature = "quantised")]
#[allow(clippy::too_many_arguments)]
pub fn build_ivf_opq_index<T>(
mat: MatRef<T>,
nlist: Option<usize>,
m: usize,
max_iters: Option<usize>,
n_opq_centroids: Option<usize>,
n_opq_iter: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
) -> IvfOpqIndex<T>
where
T: AnnSearchFloat + AddAssign,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
IvfOpqIndex::build(
mat,
nlist,
m,
ann_dist,
max_iters,
n_opq_iter,
n_opq_centroids,
seed,
verbose,
)
}
#[cfg(feature = "quantised")]
pub fn query_ivf_opq_index<T>(
query_mat: MatRef<T>,
index: &IvfOpqIndex<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + AddAssign,
{
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, nprobe)
})
}
#[cfg(feature = "quantised")]
pub fn query_ivf_opq_index_self<T>(
index: &IvfOpqIndex<T>,
k: usize,
nprobe: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + AddAssign,
{
index.generate_knn(k, nprobe, return_dist, verbose)
}
#[cfg(feature = "gpu")]
pub fn build_exhaustive_index_gpu<T, R>(
mat: MatRef<T>,
dist_metric: &str,
device: R::Device,
) -> ExhaustiveIndexGpu<T, R>
where
T: AnnSearchGpuFloat + AnnSearchFloat,
R: Runtime,
{
let metric = parse_ann_dist(dist_metric).unwrap_or_default();
ExhaustiveIndexGpu::new(mat, metric, device)
}
#[cfg(feature = "gpu")]
pub fn query_exhaustive_index_gpu<T, R>(
query_mat: MatRef<T>,
index: &ExhaustiveIndexGpu<T, R>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchGpuFloat + AnnSearchFloat,
R: Runtime,
{
let (indices, distances) = index.query_batch(query_mat, k, verbose);
if return_dist {
(indices, Some(distances))
} else {
(indices, None)
}
}
#[cfg(feature = "gpu")]
pub fn query_exhaustive_index_gpu_self<T, R>(
index: &ExhaustiveIndexGpu<T, R>,
k: usize,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchGpuFloat + AnnSearchFloat,
R: Runtime,
{
index.generate_knn(k, return_dist, verbose)
}
#[cfg(feature = "gpu")]
pub fn build_ivf_index_gpu<T, R>(
mat: MatRef<T>,
nlist: Option<usize>,
max_iters: Option<usize>,
dist_metric: &str,
seed: usize,
verbose: bool,
device: R::Device,
) -> IvfIndexGpu<T, R>
where
R: Runtime,
T: AnnSearchFloat + AnnSearchGpuFloat,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
IvfIndexGpu::build(mat, ann_dist, nlist, max_iters, seed, verbose, device)
}
#[cfg(feature = "gpu")]
pub fn query_ivf_index_gpu<T, R>(
query_mat: MatRef<T>,
index: &IvfIndexGpu<T, R>,
k: usize,
nprobe: Option<usize>,
nquery: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
R: Runtime,
T: AnnSearchFloat + AnnSearchGpuFloat,
{
let (indices, distances) = index.query_batch(query_mat, k, nprobe, nquery, verbose);
if return_dist {
(indices, Some(distances))
} else {
(indices, None)
}
}
#[cfg(feature = "gpu")]
pub fn query_ivf_index_gpu_self<T, R>(
index: &IvfIndexGpu<T, R>,
k: usize,
nprobe: Option<usize>,
nquery: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
R: Runtime,
T: AnnSearchFloat + AnnSearchGpuFloat,
{
index.generate_knn(k, nprobe, nquery, return_dist, verbose)
}
#[cfg(feature = "gpu")]
#[allow(clippy::too_many_arguments)]
pub fn build_nndescent_index_gpu<T, R>(
mat: MatRef<T>,
dist_metric: &str,
k: Option<usize>,
build_k: Option<usize>,
max_iters: Option<usize>,
n_trees: Option<usize>,
delta: Option<f32>,
rho: Option<f32>,
refine_knn: Option<usize>,
seed: usize,
verbose: bool,
retain_gpu: bool,
device: R::Device,
) -> NNDescentGpu<T, R>
where
R: Runtime,
T: AnnSearchFloat + AnnSearchGpuFloat,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
NNDescentGpu::build(
mat, ann_dist, k, build_k, max_iters, n_trees, delta, rho, refine_knn, seed, verbose,
retain_gpu, device,
)
}
#[cfg(feature = "gpu")]
pub fn query_nndescent_index_gpu<T, R>(
query_mat: MatRef<T>,
index: &mut NNDescentGpu<T, R>,
k: usize,
ef_search: Option<usize>,
query_params: Option<CagraGpuSearchParams>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
R: Runtime,
T: AnnSearchFloat + AnnSearchGpuFloat,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
use rayon::prelude::*;
let n_queries = query_mat.nrows();
let gpu_batch_threshold = 32;
if n_queries >= gpu_batch_threshold && ef_search.is_none() {
if verbose {
println!(" GPU batch query: {} vectors, k={}...", n_queries, k);
}
let queries_flat: Vec<T> = (0..n_queries)
.flat_map(|i| {
let row = query_mat.row(i);
if row.col_stride() == 1 {
unsafe { std::slice::from_raw_parts(row.as_ptr(), row.ncols()) }.to_vec()
} else {
row.iter().cloned().collect()
}
})
.collect();
let (indices, distances) =
index.query_batch_gpu(&queries_flat, n_queries, query_params, k, 42);
if return_dist {
(indices, Some(distances))
} else {
(indices, None)
}
} else {
if verbose {
println!(
" CPU beam search: {} vectors (ef={:?})...",
n_queries, ef_search
);
}
let results: Vec<(Vec<usize>, Vec<T>)> = (0..n_queries)
.into_par_iter()
.map(|i| {
let row = query_mat.row(i);
index.query_row(row, k, ef_search)
})
.collect();
if return_dist {
let (indices, distances) = results.into_iter().unzip();
(indices, Some(distances))
} else {
let indices = results.into_iter().map(|(idx, _)| idx).collect();
(indices, None)
}
}
}
#[cfg(feature = "gpu")]
pub fn extract_nndescent_knn_gpu<T, R>(
index: &NNDescentGpu<T, R>,
return_dist: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
R: Runtime,
T: AnnSearchFloat + AnnSearchGpuFloat,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
index.extract_knn(return_dist)
}
#[cfg(feature = "gpu")]
pub fn query_nndescent_index_gpu_self<T, R>(
index: &mut NNDescentGpu<T, R>,
k: usize,
query_params: Option<CagraGpuSearchParams>,
return_dist: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
R: Runtime,
T: AnnSearchFloat + AnnSearchGpuFloat,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
let (indices, distances) = index.self_query_gpu(k, query_params, 42);
if return_dist {
(indices, Some(distances))
} else {
(indices, None)
}
}
#[cfg(feature = "binary")]
pub fn build_exhaustive_index_binary<T>(
mat: MatRef<T>,
n_bits: usize,
seed: usize,
binary_init: &str,
metric: &str,
save_store: bool,
save_path: Option<impl AsRef<Path>>,
) -> std::io::Result<ExhaustiveIndexBinary<T>>
where
T: AnnSearchFloat + Pod,
{
let metric = parse_ann_dist(metric).unwrap_or_default();
if save_store {
let path = save_path.expect("save_path required when save_store is true");
ExhaustiveIndexBinary::new_with_vector_store(mat, binary_init, n_bits, metric, seed, path)
} else {
Ok(ExhaustiveIndexBinary::new(mat, binary_init, n_bits, seed))
}
}
#[cfg(feature = "binary")]
pub fn query_exhaustive_index_binary<T>(
query_mat: MatRef<T>,
index: &ExhaustiveIndexBinary<T>,
k: usize,
rerank: bool,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
if rerank {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row_reranking(query_mat.row(i), k, rerank_factor)
})
} else {
let (indices, dist) = if index.use_asymmetric() {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row_asymmetric(query_mat.row(i), k, rerank_factor)
})
} else {
let (indices, distances_u32) =
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k)
});
let distances_t = distances_u32.map(|dists| {
dists
.into_iter()
.map(|v| v.into_iter().map(|d| T::from_u32(d).unwrap()).collect())
.collect()
});
(indices, distances_t)
};
(indices, dist)
}
}
#[cfg(feature = "binary")]
pub fn query_exhaustive_index_binary_self<T>(
index: &ExhaustiveIndexBinary<T>,
k: usize,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
index.generate_knn(k, rerank_factor, return_dist, verbose)
}
#[cfg(feature = "binary")]
#[allow(clippy::too_many_arguments)]
pub fn build_ivf_index_binary<T>(
mat: MatRef<T>,
binarisation_init: &str,
n_bits: usize,
nlist: Option<usize>,
max_iters: Option<usize>,
dist_metric: &str,
seed: usize,
save_store: bool,
save_path: Option<impl AsRef<Path>>,
verbose: bool,
) -> std::io::Result<IvfIndexBinary<T>>
where
T: AnnSearchFloat + Pod,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
if save_store {
let path = save_path.expect("save_path required when save_store is true");
IvfIndexBinary::build_with_vector_store(
mat,
binarisation_init,
n_bits,
ann_dist,
nlist,
max_iters,
seed,
verbose,
path,
)
} else {
Ok(IvfIndexBinary::build(
mat,
binarisation_init,
n_bits,
ann_dist,
nlist,
max_iters,
seed,
verbose,
))
}
}
#[cfg(feature = "binary")]
#[allow(clippy::too_many_arguments)]
pub fn query_ivf_index_binary<T>(
query_mat: MatRef<T>,
index: &IvfIndexBinary<T>,
k: usize,
nprobe: Option<usize>,
rerank: bool,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
if rerank {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row_reranking(query_mat.row(i), k, nprobe, rerank_factor)
})
} else {
let (indices, dist) = if index.use_asymmetric() {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row_asymmetric(query_mat.row(i), k, nprobe, rerank_factor)
})
} else {
let (indices, distances_u32) =
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, nprobe)
});
let distances_t = distances_u32.map(|dists| {
dists
.into_iter()
.map(|v| v.into_iter().map(|d| T::from_u32(d).unwrap()).collect())
.collect()
});
(indices, distances_t)
};
(indices, dist)
}
}
#[cfg(feature = "binary")]
pub fn query_ivf_index_binary_self<T>(
index: &IvfIndexBinary<T>,
k: usize,
nprobe: Option<usize>,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
index.generate_knn(k, nprobe, rerank_factor, return_dist, verbose)
}
#[cfg(feature = "binary")]
pub fn build_exhaustive_index_rabitq<T>(
mat: MatRef<T>,
n_clust_rabitq: Option<usize>,
dist_metric: &str,
seed: usize,
save_store: bool,
save_path: Option<impl AsRef<Path>>,
) -> std::io::Result<ExhaustiveIndexRaBitQ<T>>
where
T: AnnSearchFloat + Pod,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
if save_store {
let path = save_path.expect("save_path required when save_store is true");
ExhaustiveIndexRaBitQ::new_with_vector_store(mat, &ann_dist, n_clust_rabitq, seed, path)
} else {
Ok(ExhaustiveIndexRaBitQ::new(
mat,
&ann_dist,
n_clust_rabitq,
seed,
))
}
}
#[cfg(feature = "binary")]
#[allow(clippy::too_many_arguments)]
pub fn query_exhaustive_index_rabitq<T>(
query_mat: MatRef<T>,
index: &ExhaustiveIndexRaBitQ<T>,
k: usize,
n_probe: Option<usize>,
rerank: bool,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
if rerank {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row_reranking(query_mat.row(i), k, n_probe, rerank_factor)
})
} else {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, n_probe)
})
}
}
#[cfg(feature = "binary")]
pub fn query_exhaustive_index_rabitq_self<T>(
index: &ExhaustiveIndexRaBitQ<T>,
k: usize,
n_probe: Option<usize>,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
index.generate_knn(k, n_probe, rerank_factor, return_dist, verbose)
}
#[cfg(feature = "binary")]
#[allow(clippy::too_many_arguments)]
pub fn build_ivf_index_rabitq<T>(
mat: MatRef<T>,
nlist: Option<usize>,
max_iters: Option<usize>,
dist_metric: &str,
seed: usize,
save_store: bool,
save_path: Option<impl AsRef<Path>>,
verbose: bool,
) -> std::io::Result<IvfIndexRaBitQ<T>>
where
T: AnnSearchFloat + Pod,
{
let ann_dist = parse_ann_dist(dist_metric).unwrap_or_default();
if save_store {
let path = save_path.expect("save_path required when save_store is true");
IvfIndexRaBitQ::build_with_vector_store(
mat, ann_dist, nlist, max_iters, seed, verbose, path,
)
} else {
Ok(IvfIndexRaBitQ::build(
mat, ann_dist, nlist, max_iters, seed, verbose,
))
}
}
#[cfg(feature = "binary")]
#[allow(clippy::too_many_arguments)]
pub fn query_ivf_index_rabitq<T>(
query_mat: MatRef<T>,
index: &IvfIndexRaBitQ<T>,
k: usize,
nprobe: Option<usize>,
rerank: bool,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
if rerank {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row_reranking(query_mat.row(i), k, nprobe, rerank_factor)
})
} else {
query_parallel(query_mat.nrows(), return_dist, verbose, |i| {
index.query_row(query_mat.row(i), k, nprobe)
})
}
}
#[cfg(feature = "binary")]
pub fn query_ivf_index_rabitq_self<T>(
index: &IvfIndexRaBitQ<T>,
k: usize,
nprobe: Option<usize>,
rerank_factor: Option<usize>,
return_dist: bool,
verbose: bool,
) -> (Vec<Vec<usize>>, Option<Vec<Vec<T>>>)
where
T: AnnSearchFloat + Pod,
{
index.generate_knn(k, nprobe, rerank_factor, return_dist, verbose)
}