1use diskann_rs::{DiskANN, DiskAnnError, DistanceMetric};
2use rand::prelude::*;
3use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
4use std::sync::Arc;
5use std::time::Instant;
6
7fn main() -> Result<(), DiskAnnError> {
8 const NUM_VECTORS: usize = 1_000_000;
9 const DIM: usize = 1536;
10 const MAX_DEGREE: usize = 32;
11 const BUILD_BEAM_WIDTH: usize = 128;
12 const ALPHA: f32 = 1.2;
13 let distance_metric = DistanceMetric::Cosine;
14
15 let singlefile_path = "diskann_large.db";
16
17 if !std::path::Path::new(singlefile_path).exists() {
19 println!(
20 "Building DiskANN index with {} vectors, dim={}, distance={:?}",
21 NUM_VECTORS, DIM, distance_metric
22 );
23
24 println!("Generating vectors...");
26 let mut rng = thread_rng();
27 let mut vectors = Vec::new();
28 for i in 0..NUM_VECTORS {
29 if i % 100_000 == 0 {
30 println!(" Generated {} vectors...", i);
31 }
32 let v: Vec<f32> = (0..DIM).map(|_| rng.gen()).collect();
33 vectors.push(v);
34 }
35
36 println!("Starting index build...");
37 let start = Instant::now();
38 let _index = DiskANN::build_index(
39 &vectors,
40 MAX_DEGREE,
41 BUILD_BEAM_WIDTH,
42 ALPHA,
43 distance_metric,
44 singlefile_path,
45 )?;
46 let elapsed = start.elapsed().as_secs_f32();
47 println!("Done building index in {:.2} s", elapsed);
48 } else {
49 println!(
50 "Index file {} already exists, skipping build.",
51 singlefile_path
52 );
53 }
54
55 let open_start = Instant::now();
57 let index = Arc::new(DiskANN::open_index(singlefile_path)?);
58 let open_time = open_start.elapsed().as_secs_f32();
59 println!(
60 "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
61 index.num_vectors, index.dim, index.distance_metric, open_time
62 );
63
64 let num_queries = 100;
66 let k = 10;
67 let beam_width = 64;
68
69 println!("\nGenerating {} query vectors...", num_queries);
71 let mut rng = thread_rng();
72 let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(num_queries);
73 for _ in 0..num_queries {
74 let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
75 query_batch.push(q);
76 }
77
78 println!("\nRunning sequential queries to measure performance...");
80 let mut times = Vec::new();
81 for (i, query) in query_batch.iter().take(10).enumerate() {
82 let start = Instant::now();
83 let neighbors = index.search(query, k, beam_width);
84 let elapsed = start.elapsed();
85 times.push(elapsed.as_micros());
86 println!("Query {}: found {} neighbors in {:?}", i, neighbors.len(), elapsed);
87 }
88
89 let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
90 println!("Average query time: {:.2} µs", avg_time);
91
92 println!("\nRunning {} queries in parallel...", num_queries);
94 let search_start = Instant::now();
95 let results: Vec<Vec<u32>> = query_batch
96 .par_iter()
97 .map(|query| index.search(query, k, beam_width))
98 .collect();
99 let search_time = search_start.elapsed().as_secs_f32();
100
101 println!("Performed {} queries in {:.2} s", num_queries, search_time);
102 println!("Throughput: {:.2} queries/sec", num_queries as f32 / search_time);
103
104 let all_valid = results.iter().all(|r| r.len() == k.min(index.num_vectors));
106 println!("All queries returned valid results: {}", all_valid);
107
108 println!("\nMemory-mapped index ready. The process should have minimal memory footprint.");
110 println!("You can check memory usage with 'ps aux | grep perf_test' in another terminal.");
111 println!("Press Enter to exit...");
112
113 let mut input = String::new();
114 std::io::stdin().read_line(&mut input)?;
115
116 Ok(())
117}