#![allow(clippy::expect_used, clippy::unwrap_used)]
#[path = "common/mod.rs"]
mod common;
use std::collections::HashSet;
use std::fs::{File, OpenOptions};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::time::Instant;
struct Config {
data_dir: String,
algos: Vec<String>,
m: usize,
ef_construction: usize,
ef_search_values: Vec<usize>,
json: bool,
results_path: PathBuf,
is_euclidean: bool,
}
impl Default for Config {
fn default() -> Self {
Self {
data_dir: "data/ann-benchmarks/glove-25-angular".into(),
algos: vec!["hnsw".into()],
m: 16,
ef_construction: 200,
ef_search_values: vec![10, 20, 50, 100, 200, 400],
json: false,
results_path: PathBuf::new(), is_euclidean: false, }
}
}
fn default_results_path(data_dir: &str) -> PathBuf {
let dataset = Path::new(data_dir)
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let results_dir = Path::new(data_dir)
.parent()
.unwrap_or(Path::new("."))
.join("results");
std::fs::create_dir_all(&results_dir).ok();
results_dir.join(format!("{}-all-algos.jsonl", dataset))
}
fn load_completed_algos(path: &Path) -> HashSet<String> {
let mut completed = HashSet::new();
let Ok(file) = File::open(path) else {
return completed;
};
for line in BufReader::new(file).lines() {
let Ok(line) = line else { continue };
if let Some(start) = line.find("\"algorithm\":\"") {
let rest = &line[start + 13..];
if let Some(end) = rest.find('"') {
completed.insert(rest[..end].to_string());
}
}
}
completed
}
fn emit_result(results_path: &Path, line: &str) {
println!("{}", line);
if let Ok(mut f) = OpenOptions::new()
.create(true)
.append(true)
.open(results_path)
{
let _ = writeln!(f, "{}", line);
}
}
fn parse_args() -> Config {
let args: Vec<String> = std::env::args().collect();
let mut cfg = Config::default();
let mut algos_set = false;
let mut results_override = false;
let mut i = 1;
while i < args.len() {
match args[i].as_str() {
"--algo" => {
i += 1;
if !algos_set {
cfg.algos.clear();
algos_set = true;
}
if i < args.len() {
cfg.algos.push(args[i].to_lowercase());
}
}
"--m" => {
i += 1;
if i < args.len() {
cfg.m = args[i].parse().unwrap_or(16);
}
}
"--ef-construction" => {
i += 1;
if i < args.len() {
cfg.ef_construction = args[i].parse().unwrap_or(200);
}
}
"--ef-search" => {
i += 1;
if i < args.len() {
cfg.ef_search_values = args[i]
.split(',')
.filter_map(|s| s.trim().parse().ok())
.collect();
}
}
"--json" => {
cfg.json = true;
}
"--results" => {
i += 1;
if i < args.len() {
cfg.results_path = PathBuf::from(&args[i]);
results_override = true;
}
}
"--fresh" => {
cfg.results_path = PathBuf::from("__fresh__");
}
arg if !arg.starts_with("--") => {
cfg.data_dir = arg.to_string();
}
_ => {
eprintln!("Unknown flag: {}", args[i]);
}
}
i += 1;
}
let fresh_sentinel: PathBuf = PathBuf::from("__fresh__");
let is_fresh = cfg.results_path == fresh_sentinel;
if !results_override && !is_fresh {
cfg.results_path = default_results_path(&cfg.data_dir);
} else if is_fresh {
cfg.results_path = default_results_path(&cfg.data_dir);
std::fs::remove_file(&cfg.results_path).ok();
}
cfg.is_euclidean = cfg.data_dir.contains("euclidean");
cfg
}
struct BenchResult {
recall_at_k: f64,
qps: f64,
latency_us: f64,
p50_us: f64,
p95_us: f64,
p99_us: f64,
}
fn json_line(
algorithm: &str,
params: &str,
build_time_s: f64,
rss_kb: Option<u64>,
result: &BenchResult,
) -> String {
let mut s = format!(
"{{\"algorithm\":\"{}\",\"params\":{},\"recall_at_10\":{:.4},\"qps\":{:.1},\"build_time_s\":{:.2},\"latency_us\":{:.1},\"p50_us\":{:.1},\"p95_us\":{:.1},\"p99_us\":{:.1}",
algorithm, params, result.recall_at_k, result.qps, build_time_s, result.latency_us,
result.p50_us, result.p95_us, result.p99_us
);
if let Some(kb) = rss_kb {
s.push_str(&format!(",\"rss_kb\":{}", kb));
}
s.push('}');
s
}
const WARMUP_QUERIES: usize = 50;
fn evaluate(
search_fn: &dyn Fn(&[f32], usize) -> Vec<(u32, f32)>,
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
k: usize,
) -> BenchResult {
let warmup_count = WARMUP_QUERIES.min(test.len());
for query in test.iter().take(warmup_count) {
let _ = search_fn(query, k);
}
let mut total_recall = 0.0;
let mut latencies_us: Vec<f64> = Vec::with_capacity(test.len());
for (i, query) in test.iter().enumerate() {
let q_start = Instant::now();
let results = search_fn(query, k);
let q_elapsed = q_start.elapsed();
latencies_us.push(q_elapsed.as_nanos() as f64 / 1000.0);
let gt_set: HashSet<u32> = neighbors[i].iter().take(k).map(|&n| n as u32).collect();
let found: HashSet<u32> = results.iter().map(|r| r.0).collect();
total_recall += gt_set.intersection(&found).count() as f64 / k as f64;
}
latencies_us.sort_unstable_by(|a, b| a.total_cmp(b));
let n = latencies_us.len();
let total_us: f64 = latencies_us.iter().sum();
BenchResult {
recall_at_k: total_recall / n as f64,
qps: n as f64 / (total_us / 1_000_000.0),
latency_us: total_us / n as f64,
p50_us: latencies_us[n / 2],
p95_us: latencies_us[(n as f64 * 0.95) as usize],
p99_us: latencies_us[(n as f64 * 0.99) as usize],
}
}
fn current_rss_kb() -> Option<u64> {
#[cfg(target_os = "macos")]
{
let output = std::process::Command::new("ps")
.args(["-o", "rss=", "-p", &std::process::id().to_string()])
.output()
.ok()?;
let s = String::from_utf8_lossy(&output.stdout);
s.trim().parse::<u64>().ok()
}
#[cfg(target_os = "linux")]
{
let status = std::fs::read_to_string("/proc/self/status").ok()?;
for line in status.lines() {
if let Some(rest) = line.strip_prefix("VmRSS:") {
let kb_str = rest.trim().trim_end_matches(" kB").trim();
return kb_str.parse::<u64>().ok();
}
}
None
}
#[cfg(not(any(target_os = "macos", target_os = "linux")))]
{
None
}
}
fn brute_force_search(
train: &[Vec<f32>],
query: &[f32],
k: usize,
metric: vicinity::DistanceMetric,
) -> Vec<(u32, f32)> {
let mut dists: Vec<(u32, f32)> = train
.iter()
.enumerate()
.map(|(i, v)| (i as u32, metric.distance(query, v)))
.collect();
dists.sort_unstable_by(|a, b| a.1.total_cmp(&b.1));
dists.truncate(k);
dists
}
fn print_header() {
println!(
"{:>10} {:>10} {:>10} {:>10} {:>10} {:>10}",
"param", "Recall@10", "QPS", "p50(us)", "p95(us)", "p99(us)"
);
println!("{}", "-".repeat(65));
}
fn print_row(param_label: &str, result: &BenchResult) {
println!(
"{:>10} {:>9.1}% {:>9.0} {:>9.0} {:>9.0} {:>9.0}",
param_label,
result.recall_at_k * 100.0,
result.qps,
result.p50_us,
result.p95_us,
result.p99_us
);
}
#[cfg(feature = "hnsw")]
fn run_hnsw(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::hnsw::{HNSWIndex, HNSWParams};
let metric = if cfg.is_euclidean {
vicinity::DistanceMetric::L2
} else {
vicinity::DistanceMetric::Cosine
};
let params = HNSWParams {
m: cfg.m,
m_max: cfg.m,
ef_construction: cfg.ef_construction,
metric,
auto_normalize: !cfg.is_euclidean,
..Default::default()
};
if !cfg.json {
println!(
"--- HNSW (M={}, ef_construction={}, metric={:?}) ---",
cfg.m, cfg.ef_construction, metric
);
}
let build_start = Instant::now();
let mut index = HNSWIndex::with_params(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(&|q, k| index.search(q, k, ef).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = format!(
"{{\"m\":{},\"ef_construction\":{},\"ef_search\":{}}}",
cfg.m, cfg.ef_construction, ef
);
emit_result(
&cfg.results_path,
&json_line("hnsw", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "nsw")]
fn run_nsw(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::nsw::NSWIndex;
if !cfg.json {
println!("--- NSW (M={}) ---", cfg.m);
}
let build_start = Instant::now();
let mut index = NSWIndex::new(dim, cfg.m, cfg.m).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(&|q, k| index.search(q, k, ef).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = format!("{{\"m\":{},\"ef_search\":{}}}", cfg.m, ef);
emit_result(
&cfg.results_path,
&json_line("nsw", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "ivf_pq")]
fn run_ivfpq(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::ivf_pq::{IVFPQIndex, IVFPQParams};
let num_clusters = 256;
let num_codebooks = (1..=8.min(dim))
.rev()
.find(|&c| dim.is_multiple_of(c))
.unwrap_or(1);
if !cfg.json {
println!(
"--- IVF-PQ (clusters={}, codebooks={}) ---",
num_clusters, num_codebooks
);
}
let params = IVFPQParams {
num_clusters,
num_codebooks,
codebook_size: 256,
nprobe: 1, ..Default::default()
};
let build_start = Instant::now();
let mut index = IVFPQIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let nprobe_values = [1, 2, 5, 10, 20, 50, 100];
for &nprobe in &nprobe_values {
if nprobe > num_clusters {
continue;
}
index.set_nprobe(nprobe);
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = format!(
"{{\"num_clusters\":{},\"num_codebooks\":{},\"nprobe\":{}}}",
num_clusters, num_codebooks, nprobe
);
emit_result(
&cfg.results_path,
&json_line("ivfpq", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("np={}", nprobe), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "emg")]
fn run_emg(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::emg::{EmgIndex, EmgParams};
let params = EmgParams {
max_degree: 32,
candidate_size: 64,
scale_t: 32,
iterations: 2,
alpha: 1.5,
ef_search: 100,
};
if !cfg.json {
println!("--- EMG (max_degree=32) ---");
}
let build_start = Instant::now();
let mut index = EmgIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| index.search_with_ef(q, k, ef).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!("{{\"max_degree\":32,\"ef_search\":{}}}", ef);
emit_result(
&cfg.results_path,
&json_line("emg", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "nsg")]
fn run_nsg(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::nsg::{NsgIndex, NsgParams};
let n = train.len().min(50_000);
if train.len() > 50_000 {
eprintln!(
"NSG: capping at 50,000 vectors (got {}); O(n^2) construction",
train.len()
);
}
let train = &train[..n];
if !cfg.json {
println!("--- NSG (max_degree=32, n={}) ---", n);
}
let params = NsgParams::default();
let build_start = Instant::now();
let mut index = NsgIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| index.search_with_ef(q, k, ef).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!("{{\"max_degree\":32,\"ef_search\":{}}}", ef);
emit_result(
&cfg.results_path,
&json_line("nsg", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "pipnn")]
fn run_pipnn(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::pipnn::{PipnnIndex, PipnnParams};
let params = PipnnParams {
max_leaf_size: 2048,
max_degree: 32,
num_hash_bits: 12,
final_prune: true,
alpha: 1.2,
ef_search: 100,
..Default::default()
};
if !cfg.json {
println!("--- PiPNN (max_degree=32, max_leaf_size=2048) ---");
}
let build_start = Instant::now();
let mut index = PipnnIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| index.search_with_ef(q, k, ef).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!(
"{{\"max_degree\":32,\"max_leaf_size\":2048,\"ef_search\":{}}}",
ef
);
emit_result(
&cfg.results_path,
&json_line("pipnn", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "sng")]
fn run_sng(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::sng::SNGIndex;
use vicinity::sng::SNGParams;
let n = train.len().min(50_000);
if train.len() > 50_000 {
eprintln!(
"SNG: capping at 50,000 vectors (got {}); O(n^2) construction",
train.len()
);
}
let train = &train[..n];
if !cfg.json {
println!("--- SNG (n={}) ---", n);
}
let params = SNGParams::default();
let build_start = Instant::now();
let mut index = SNGIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add(i as u32, vec.clone()).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = "{}";
emit_result(
&cfg.results_path,
&json_line("sng", params_json, build_time_s, rss, &result),
);
} else {
print_row("--", &result);
println!();
}
}
#[cfg(feature = "vamana")]
fn run_vamana(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::vamana::VamanaIndex;
use vicinity::vamana::VamanaParams;
if !cfg.json {
println!("--- Vamana ---");
}
let params = VamanaParams::default();
let build_start = Instant::now();
let mut index = VamanaIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add(i as u32, vec.clone()).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(&|q, k| index.search(q, k, ef).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = format!("{{\"ef_search\":{}}}", ef);
emit_result(
&cfg.results_path,
&json_line("vamana", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "ivf_rabitq")]
fn run_ivf_rabitq(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::ivf_rabitq::{IVFRaBitQIndex, IVFRaBitQParams};
let num_clusters = 256;
if !cfg.json {
println!(
"--- IVF-RaBitQ (clusters={}, total_bits=4) ---",
num_clusters
);
}
let params = IVFRaBitQParams {
num_clusters,
nprobe: 10,
total_bits: 4,
seed: 42,
};
let build_start = Instant::now();
let mut index = IVFRaBitQIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let nprobe_values = [1, 2, 5, 10, 20, 50, 100];
for &nprobe in &nprobe_values {
if nprobe > num_clusters {
continue;
}
index.set_nprobe(nprobe);
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = format!(
"{{\"num_clusters\":{},\"total_bits\":4,\"nprobe\":{}}}",
num_clusters, nprobe
);
emit_result(
&cfg.results_path,
&json_line("ivf_rabitq", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("np={}", nprobe), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "finger")]
fn run_finger(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::finger::{FingerIndex, FingerParams};
let n = train.len().min(50_000);
if train.len() > 50_000 {
eprintln!(
"FINGER: capping at 50,000 vectors (got {}); construction is expensive",
train.len()
);
}
let train = &train[..n];
let params = FingerParams {
max_degree: 32,
ef_construction: 200,
ef_search: 100,
alpha: 1.2,
};
if !cfg.json {
println!("--- FINGER (max_degree=32, n={}) ---", n);
}
let build_start = Instant::now();
let mut index = FingerIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| index.search_with_ef(q, k, ef).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!("{{\"max_degree\":32,\"ef_search\":{}}}", ef);
emit_result(
&cfg.results_path,
&json_line("finger", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "fresh_graph")]
fn run_fresh_graph(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::fresh_graph::{FreshGraphIndex, FreshGraphParams};
let n = train.len().min(50_000);
if train.len() > 50_000 {
eprintln!(
"FreshGraph: capping at 50,000 vectors (got {}); construction is expensive",
train.len()
);
}
let train = &train[..n];
let params = FreshGraphParams {
max_degree: 32,
ef_construction: 200,
ef_search: 100,
alpha: 1.2,
};
if !cfg.json {
println!("--- FreshGraph (max_degree=32, n={}) ---", n);
}
let build_start = Instant::now();
let mut index = FreshGraphIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| index.search_with_ef(q, k, ef).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!("{{\"max_degree\":32,\"ef_search\":{}}}", ef);
emit_result(
&cfg.results_path,
&json_line("fresh_graph", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "filtered_graph")]
fn run_filtered_graph(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use std::collections::HashMap;
use vicinity::filtered_graph::{FilteredGraphIndex, FilteredGraphParams};
let n = train.len().min(50_000);
if train.len() > 50_000 {
eprintln!(
"FilteredGraph: capping at 50,000 vectors (got {}); construction is expensive",
train.len()
);
}
let train = &train[..n];
let params = FilteredGraphParams {
max_degree: 32,
ef_construction: 200,
ef_search: 100,
alpha: 1.2,
};
if !cfg.json {
println!("--- FilteredGraph (max_degree=32, unfiltered, n={}) ---", n);
}
let build_start = Instant::now();
let mut index = FilteredGraphIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec, HashMap::new()).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| index.search_with_ef(q, k, ef).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!("{{\"max_degree\":32,\"ef_search\":{}}}", ef);
emit_result(
&cfg.results_path,
&json_line("filtered_graph", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "sparse_mips")]
fn run_sparse_mips(
_cfg: &Config,
_train: &[Vec<f32>],
_test: &[Vec<f32>],
_neighbors: &[Vec<i32>],
) {
eprintln!(
"sparse_mips: skipped -- index requires sparse vectors (SparseVector); \
the dense benchmark dataset (f32 slices) is incompatible. \
Use a sparse dataset such as SPLADE or BM25 embeddings instead."
);
}
#[cfg(feature = "rp_quant")]
fn run_rp_quant(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::rp_quant::{RpQuantIndex, RpQuantParams};
let projected_dim = 64.min(dim);
if !cfg.json {
println!(
"--- RpQuant (projected_dim={}, rerank=10) ---",
projected_dim
);
}
let params = RpQuantParams {
projected_dim,
rerank_factor: 10,
seed: 42,
};
let build_start = Instant::now();
let mut index = RpQuantIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = format!(
"{{\"projected_dim\":{},\"rerank_factor\":10}}",
projected_dim
);
emit_result(
&cfg.results_path,
&json_line("rp_quant", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row("--", &result);
println!();
}
}
#[cfg(feature = "sq4")]
fn run_sq4(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::sq4::{SQ4Index, SQ4Params};
if !cfg.json {
println!("--- SQ4 (4-bit scalar quantization, rerank=10) ---");
}
let params = SQ4Params { rerank_factor: 10 };
let build_start = Instant::now();
let mut index = SQ4Index::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = "{\"rerank_factor\":10}";
emit_result(
&cfg.results_path,
&json_line("sq4", params_json, build_time_s, rss, &result),
);
} else {
print_row("--", &result);
println!();
}
}
#[cfg(feature = "hnsw")]
fn run_adsampling(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::adsampling::{ADSamplingParams, ADSamplingState};
use vicinity::hnsw::{HNSWIndex, HNSWParams};
let m = cfg.m;
let ef_construction = cfg.ef_construction;
if !cfg.json {
println!(
"--- ADSampling (HNSW m={}, ef_c={}, eps0=2.1) ---",
m, ef_construction
);
}
let build_start = Instant::now();
let metric = if cfg.is_euclidean {
vicinity::DistanceMetric::L2
} else {
vicinity::DistanceMetric::Cosine
};
let params = HNSWParams {
m,
m_max: m,
ef_construction,
metric,
auto_normalize: !cfg.is_euclidean,
seed: Some(42),
..Default::default()
};
let mut index = HNSWIndex::with_params(dim, params).unwrap();
let ids: Vec<u32> = (0..train.len() as u32).collect();
let flat: Vec<f32> = train.iter().flatten().copied().collect();
index.add_batch(&ids, &flat).unwrap();
let _ = index.build();
let ads_params = ADSamplingParams::default();
let state = ADSamplingState::from_hnsw(&index, ads_params);
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| state.search_hnsw(&index, q, k, ef).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!(
"{{\"m\":{},\"ef_construction\":{},\"ef_search\":{},\"epsilon0\":2.1}}",
m, ef_construction, ef
);
emit_result(
&cfg.results_path,
&json_line("adsampling", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(all(feature = "hnsw", feature = "ivf_rabitq"))]
fn run_symphony_qg(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::hnsw::SymphonyQGIndex;
if !cfg.json {
println!("--- SymphonyQG (m=16, 4-bit RaBitQ) ---");
}
let build_start = Instant::now();
let mut index = SymphonyQGIndex::new(dim, 16, 16).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let rerank_pool = (ef * 2).max(100);
let result = evaluate(
&|q, k| index.search_reranked(q, k, ef, rerank_pool).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!(
"{{\"m\":16,\"ef_search\":{},\"rerank_pool\":{}}}",
ef, rerank_pool
);
emit_result(
&cfg.results_path,
&json_line("symphony_qg", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(feature = "curator")]
fn run_curator(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::curator::{CuratorIndex, CuratorParams};
let params = CuratorParams {
branching_factor: 16,
max_leaf_size: 128,
ef_search: 256,
beam_width: 4,
};
if !cfg.json {
println!("--- Curator (branching=16, leaf=128) ---");
}
let build_start = Instant::now();
let mut index = CuratorIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add(i as u32, vec.clone(), vec![]).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = r#"{"branching_factor":16,"max_leaf_size":128}"#;
emit_result(
&cfg.results_path,
&json_line("curator", params_json, build_time_s, rss, &result),
);
} else {
print_row("--", &result);
println!();
}
}
#[cfg(all(feature = "esg", feature = "hnsw"))]
fn run_esg(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::esg::{EsgIndex, EsgParams};
let params = EsgParams {
hnsw_m: 16,
hnsw_ef_construction: 200,
ef_search: 100,
..Default::default()
};
if !cfg.json {
println!("--- ESG (m=16, ef_search=100) ---");
}
let build_start = Instant::now();
let mut index = EsgIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add(i as u32, vec.clone(), 0.0).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = r#"{"hnsw_m":16,"ef_search":100}"#;
emit_result(
&cfg.results_path,
&json_line("esg", params_json, build_time_s, rss, &result),
);
} else {
print_row("--", &result);
println!();
}
}
#[cfg(feature = "binary_index")]
fn run_binary_index(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::binary_index::{BinaryFlatIndex, BinaryFlatParams};
let params = BinaryFlatParams {
rerank_factor: 10,
seed: 42,
..Default::default()
};
if !cfg.json {
println!("--- BinaryFlat (rerank=10) ---");
}
let build_start = Instant::now();
let mut index = BinaryFlatIndex::new(dim, params).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
let result = evaluate(&|q, k| index.search(q, k).unwrap(), test, neighbors, 10);
if cfg.json {
let params_json = r#"{"rerank_factor":10}"#;
emit_result(
&cfg.results_path,
&json_line("binary_index", params_json, build_time_s, rss, &result),
);
} else {
print_row("--", &result);
println!();
}
}
#[cfg(feature = "lsh")]
fn run_lsh(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::lsh::{CrossPolytopeLSHIndex, LSHParams};
let table_counts = [8, 16, 32];
let probe_counts = [2, 4, 8, 16];
for &num_tables in &table_counts {
let params = LSHParams {
num_tables,
num_probes: 1, seed: Some(42),
};
if !cfg.json {
println!("--- LSH (tables={}) ---", num_tables);
}
let build_start = Instant::now();
let mut index = CrossPolytopeLSHIndex::new(dim, params).unwrap();
let flat: Vec<f32> = train.iter().flat_map(|v| v.iter().copied()).collect();
index.add_vectors(&flat).unwrap();
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &num_probes in &probe_counts {
if num_probes > dim {
continue; }
let search_params = LSHParams {
num_tables,
num_probes,
seed: Some(42),
};
let mut search_index = CrossPolytopeLSHIndex::new(dim, search_params).unwrap();
search_index.add_vectors(&flat).unwrap();
search_index.build().unwrap();
let result = evaluate(
&|q, k| search_index.search(q, k).unwrap_or_default(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!(
"{{\"num_tables\":{},\"num_probes\":{}}}",
num_tables, num_probes
);
emit_result(
&cfg.results_path,
&json_line("lsh", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("probes={}", num_probes), &result);
}
}
if !cfg.json {
println!();
}
}
}
#[cfg(feature = "hnsw")]
fn run_hnsw_prt(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::hnsw::{HNSWIndex, HNSWParams};
use vicinity::prt::ProbabilisticRoutingTest;
use vicinity::DistanceMetric;
let m = cfg.m;
let ef_construction = cfg.ef_construction;
if !cfg.json {
println!("--- HNSW+PRT (M={}, ef_c={}) ---", m, ef_construction);
}
let metric = if cfg.is_euclidean {
DistanceMetric::L2
} else {
DistanceMetric::Cosine
};
let build_start = Instant::now();
let params = HNSWParams {
m,
m_max: m * 2,
ef_construction,
metric,
auto_normalize: !cfg.is_euclidean,
..Default::default()
};
let mut index = HNSWIndex::with_params(dim, params).unwrap();
let ids: Vec<u32> = (0..train.len() as u32).collect();
let flat: Vec<f32> = train.iter().flatten().copied().collect();
index.add_batch(&ids, &flat).unwrap();
index.build().unwrap();
let num_proj = (dim / 4).clamp(8, 64); let mut prt = ProbabilisticRoutingTest::new(dim, num_proj, Some(42));
prt.project_database(index.raw_vectors());
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec), PRT projections: {}\n",
build_time_s,
train.len() as f64 / build_time_s,
num_proj
);
print_header();
}
for &ef in &cfg.ef_search_values {
let result = evaluate(
&|q, k| {
index
.search_prt(q, k, ef, &prt, 1.5, 0.95)
.map(|(results, _)| results)
.unwrap_or_default()
},
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!(
"{{\"m\":{},\"ef_construction\":{},\"ef_search\":{},\"num_projections\":{}}}",
m, ef_construction, ef, num_proj
);
emit_result(
&cfg.results_path,
&json_line("hnsw_prt", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(all(feature = "hnsw", feature = "sq8"))]
fn run_sq8u(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use vicinity::hnsw::{HNSWParams, HNSWSq8Index};
use vicinity::DistanceMetric;
let m = cfg.m;
let ef_construction = cfg.ef_construction;
if !cfg.json {
println!("--- SQ8U (M={}, ef_c={}) ---", m, ef_construction);
}
let metric = if cfg.is_euclidean {
DistanceMetric::L2
} else {
DistanceMetric::Cosine
};
let build_start = Instant::now();
let params = HNSWParams {
m,
m_max: m * 2,
ef_construction,
metric,
auto_normalize: !cfg.is_euclidean,
seed: Some(42),
..Default::default()
};
let mut index = HNSWSq8Index::with_params(dim, params).unwrap();
let ids: Vec<u32> = (0..train.len() as u32).collect();
let flat: Vec<f32> = train.iter().flatten().copied().collect();
index.add_batch(&ids, &flat).unwrap();
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let rerank_pool = (ef * 2).max(100);
let result = evaluate(
&|q, k| index.search_reranked(q, k, ef, rerank_pool).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!(
"{{\"m\":{},\"ef_construction\":{},\"ef_search\":{},\"rerank_pool\":{}}}",
m, ef_construction, ef, rerank_pool
);
emit_result(
&cfg.results_path,
&json_line("sq8u", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
#[cfg(all(feature = "hnsw", feature = "ivf_rabitq"))]
fn run_symphony_qg_vr(
cfg: &Config,
train: &[Vec<f32>],
test: &[Vec<f32>],
neighbors: &[Vec<i32>],
dim: usize,
) {
use qntz::rabitq::RaBitQConfig;
use vicinity::hnsw::{HNSWParams, SymphonyQGVRIndex};
use vicinity::DistanceMetric;
let m = cfg.m;
let ef_construction = cfg.ef_construction;
if !cfg.json {
println!(
"--- SymphonyQG-VR (M={}, ef_c={}, L2-capable) ---",
m, ef_construction
);
}
let metric = if cfg.is_euclidean {
DistanceMetric::L2
} else {
DistanceMetric::Cosine
};
let build_start = Instant::now();
let params = HNSWParams {
m,
m_max: m * 2,
ef_construction,
metric,
auto_normalize: !cfg.is_euclidean,
seed: Some(42),
..Default::default()
};
let mut index = SymphonyQGVRIndex::new(dim, params, RaBitQConfig::bits4(), 42).unwrap();
for (i, vec) in train.iter().enumerate() {
index.add_slice(i as u32, vec).unwrap();
}
index.build().unwrap();
let build_time_s = build_start.elapsed().as_secs_f64();
let rss = current_rss_kb();
if !cfg.json {
println!(
"Build: {:.2}s ({:.0} vectors/sec)\n",
build_time_s,
train.len() as f64 / build_time_s
);
print_header();
}
for &ef in &cfg.ef_search_values {
let rerank_pool = (ef * 2).max(100);
let result = evaluate(
&|q, k| index.search_reranked(q, k, ef, rerank_pool).unwrap(),
test,
neighbors,
10,
);
if cfg.json {
let params_json = format!(
"{{\"m\":{},\"ef_construction\":{},\"ef_search\":{},\"rerank_pool\":{}}}",
m, ef_construction, ef, rerank_pool
);
emit_result(
&cfg.results_path,
&json_line("symphony_qg_vr", ¶ms_json, build_time_s, rss, &result),
);
} else {
print_row(&format!("ef={}", ef), &result);
}
}
if !cfg.json {
println!();
}
}
fn run_brute(cfg: &Config, train: &[Vec<f32>], test: &[Vec<f32>], neighbors: &[Vec<i32>]) {
if !cfg.json {
println!("--- Brute Force (linear scan) ---");
}
let build_time_s = 0.0; let rss = current_rss_kb();
if !cfg.json {
println!("Build: N/A (no index)\n");
print_header();
}
let metric = if cfg.is_euclidean {
vicinity::DistanceMetric::L2
} else {
vicinity::DistanceMetric::Cosine
};
let result = evaluate(
&|q, k| brute_force_search(train, q, k, metric),
test,
neighbors,
10,
);
if cfg.json {
let params_json = "{}";
emit_result(
&cfg.results_path,
&json_line("brute", params_json, build_time_s, rss, &result),
);
} else {
print_row("--", &result);
println!();
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let cfg = parse_args();
if !Path::new(&cfg.data_dir).join("train.bin").exists() {
eprintln!("Dataset not found at: {}/train.bin", cfg.data_dir);
eprintln!("Run: uv run scripts/download_ann_benchmarks.py <dataset>");
std::process::exit(1);
}
if !cfg.json {
println!("ANN Benchmark");
println!("=============");
println!("Data: {}\n", cfg.data_dir);
}
if cfg.json && !cfg.results_path.exists() {
let meta = format!(
"{{\"_meta\":{{\"dataset\":\"{}\",\"metric\":\"{}\",\"rustc\":\"{}\",\"vicinity\":\"{}\"}}}}",
cfg.data_dir,
if cfg.is_euclidean { "l2" } else { "cosine" },
env!("CARGO_PKG_RUST_VERSION"),
env!("CARGO_PKG_VERSION"),
);
emit_result(&cfg.results_path, &meta);
}
let (train, dim) = common::load_vectors(&format!("{}/train.bin", cfg.data_dir))?;
let (test, _) = common::load_vectors(&format!("{}/test.bin", cfg.data_dir))?;
let (neighbors, k_gt) = common::load_neighbors(&format!("{}/neighbors.bin", cfg.data_dir))?;
if !cfg.json {
println!("Train: {} vectors x {} dims", train.len(), dim);
println!("Test: {} queries", test.len());
println!("Ground truth: {} neighbors per query\n", k_gt);
}
let completed = load_completed_algos(&cfg.results_path);
if !completed.is_empty() {
eprintln!(
"Resuming: skipping {} completed algorithm(s): {}",
completed.len(),
completed.iter().cloned().collect::<Vec<_>>().join(", ")
);
eprintln!("Results file: {}\n", cfg.results_path.display());
} else {
eprintln!("Results file: {}\n", cfg.results_path.display());
}
for algo in &cfg.algos {
if completed.contains(algo.as_str()) {
continue;
}
match algo.as_str() {
#[cfg(feature = "hnsw")]
"hnsw" => run_hnsw(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "hnsw"))]
"hnsw" => {
eprintln!("HNSW not available (compile with --features hnsw)");
}
#[cfg(feature = "nsw")]
"nsw" if !cfg.is_euclidean => run_nsw(&cfg, &train, &test, &neighbors, dim),
#[cfg(feature = "nsw")]
"nsw" => {
eprintln!("nsw: skipping (cosine-only, dataset is euclidean)");
}
#[cfg(not(feature = "nsw"))]
"nsw" => {
eprintln!("NSW not available (compile with --features nsw)");
}
#[cfg(feature = "ivf_pq")]
"ivfpq" => run_ivfpq(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "ivf_pq"))]
"ivfpq" => {
eprintln!("IVF-PQ not available (compile with --features ivf_pq)");
}
#[cfg(feature = "emg")]
"emg" if !cfg.is_euclidean => run_emg(&cfg, &train, &test, &neighbors, dim),
#[cfg(feature = "emg")]
"emg" => eprintln!("emg: skipping (cosine-only, dataset is euclidean)"),
#[cfg(not(feature = "emg"))]
"emg" => eprintln!("EMG not available (compile with --features emg)"),
#[cfg(feature = "nsg")]
"nsg" if !cfg.is_euclidean => run_nsg(&cfg, &train, &test, &neighbors, dim),
#[cfg(feature = "nsg")]
"nsg" => eprintln!("nsg: skipping (cosine-only, dataset is euclidean)"),
#[cfg(not(feature = "nsg"))]
"nsg" => eprintln!("NSG not available (compile with --features nsg)"),
#[cfg(feature = "pipnn")]
"pipnn" if !cfg.is_euclidean => run_pipnn(&cfg, &train, &test, &neighbors, dim),
#[cfg(feature = "pipnn")]
"pipnn" => eprintln!("pipnn: skipping (cosine-only, dataset is euclidean)"),
#[cfg(not(feature = "pipnn"))]
"pipnn" => eprintln!("PiPNN not available (compile with --features pipnn)"),
#[cfg(feature = "sng")]
"sng" if !cfg.is_euclidean => run_sng(&cfg, &train, &test, &neighbors, dim),
#[cfg(feature = "sng")]
"sng" => eprintln!("sng: skipping (cosine-only, dataset is euclidean)"),
#[cfg(not(feature = "sng"))]
"sng" => eprintln!("SNG not available (compile with --features sng)"),
#[cfg(feature = "vamana")]
"vamana" => run_vamana(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "vamana"))]
"vamana" => {
eprintln!("Vamana not available (compile with --features vamana)");
}
#[cfg(feature = "ivf_rabitq")]
"ivf_rabitq" => run_ivf_rabitq(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "ivf_rabitq"))]
"ivf_rabitq" => {
eprintln!("IVF-RaBitQ not available (compile with --features ivf_rabitq)");
}
#[cfg(feature = "finger")]
"finger" if !cfg.is_euclidean => run_finger(&cfg, &train, &test, &neighbors, dim),
#[cfg(feature = "finger")]
"finger" => eprintln!("finger: skipping (cosine-only, dataset is euclidean)"),
#[cfg(not(feature = "finger"))]
"finger" => eprintln!("FINGER not available (compile with --features finger)"),
#[cfg(feature = "fresh_graph")]
"fresh_graph" => run_fresh_graph(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "fresh_graph"))]
"fresh_graph" => {
eprintln!("FreshGraph not available (compile with --features fresh_graph)");
}
#[cfg(feature = "filtered_graph")]
"filtered_graph" => run_filtered_graph(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "filtered_graph"))]
"filtered_graph" => {
eprintln!("FilteredGraph not available (compile with --features filtered_graph)");
}
#[cfg(feature = "rp_quant")]
"rp_quant" => run_rp_quant(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "rp_quant"))]
"rp_quant" => {
eprintln!("RpQuant not available (compile with --features rp_quant)");
}
#[cfg(feature = "sparse_mips")]
"sparse_mips" => run_sparse_mips(&cfg, &train, &test, &neighbors),
#[cfg(not(feature = "sparse_mips"))]
"sparse_mips" => {
eprintln!("sparse_mips not available (compile with --features sparse_mips)");
}
#[cfg(all(feature = "hnsw", feature = "ivf_rabitq"))]
"symphony_qg" if !cfg.is_euclidean => {
run_symphony_qg(&cfg, &train, &test, &neighbors, dim);
}
#[cfg(all(feature = "hnsw", feature = "ivf_rabitq"))]
"symphony_qg" => {
eprintln!("symphony_qg: skipping (cosine-only, dataset is euclidean; use symphony_qg_vr for L2)");
}
#[cfg(not(all(feature = "hnsw", feature = "ivf_rabitq")))]
"symphony_qg" => {
eprintln!("SymphonyQG not available (compile with --features hnsw,ivf_rabitq)");
}
#[cfg(feature = "curator")]
"curator" => run_curator(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "curator"))]
"curator" => {
eprintln!("Curator not available (compile with --features curator)");
}
#[cfg(all(feature = "esg", feature = "hnsw"))]
"esg" => run_esg(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(all(feature = "esg", feature = "hnsw")))]
"esg" => {
eprintln!("ESG not available (compile with --features esg,hnsw)");
}
#[cfg(feature = "binary_index")]
"binary_index" => run_binary_index(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "binary_index"))]
"binary_index" => {
eprintln!("BinaryIndex not available (compile with --features binary_index)");
}
#[cfg(feature = "sq4")]
"sq4" => run_sq4(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "sq4"))]
"sq4" => {
eprintln!("SQ4 not available (compile with --features sq4)");
}
#[cfg(all(feature = "hnsw", feature = "sq8"))]
"sq8u" => run_sq8u(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(all(feature = "hnsw", feature = "sq8")))]
"sq8u" => {
eprintln!("SQ8U not available (compile with --features hnsw,sq8)");
}
#[cfg(all(feature = "hnsw", feature = "ivf_rabitq"))]
"symphony_qg_vr" => run_symphony_qg_vr(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(all(feature = "hnsw", feature = "ivf_rabitq")))]
"symphony_qg_vr" => {
eprintln!("SymphonyQG-VR not available (compile with --features hnsw,ivf_rabitq)");
}
#[cfg(feature = "hnsw")]
"adsampling" => run_adsampling(&cfg, &train, &test, &neighbors, dim),
#[cfg(feature = "lsh")]
"lsh" => run_lsh(&cfg, &train, &test, &neighbors, dim),
#[cfg(not(feature = "lsh"))]
"lsh" => {
eprintln!("LSH not available (compile with --features lsh)");
}
#[cfg(feature = "hnsw")]
"hnsw_prt" => run_hnsw_prt(&cfg, &train, &test, &neighbors, dim),
"brute" => run_brute(&cfg, &train, &test, &neighbors),
other => {
eprintln!(
"Unknown algorithm: {}. Options: hnsw, nsw, ivfpq, emg, nsg, pipnn, sng, vamana, ivf_rabitq, symphony_qg, symphony_qg_vr, finger, fresh_graph, filtered_graph, rp_quant, sparse_mips, curator, esg, binary_index, sq4, sq8u, adsampling, lsh, hnsw_prt, brute",
other
);
}
}
}
Ok(())
}