pub struct DiskANN {
pub dim: usize,
pub num_vectors: usize,
pub max_degree: usize,
pub distance_metric: DistanceMetric,
/* private fields */
}Expand description
Main struct representing a DiskANN index
Fields§
§dim: usizeDimensionality of vectors in the index
num_vectors: usizeNumber of vectors in the index
max_degree: usizeMaximum number of edges per node
distance_metric: DistanceMetricDistance metric used by this index
Implementations§
Source§impl DiskANN
impl DiskANN
Sourcepub fn build_index(
vectors: &[Vec<f32>],
max_degree: usize,
build_beam_width: usize,
alpha: f32,
distance_metric: DistanceMetric,
file_path: &str,
) -> Result<Self, DiskAnnError>
pub fn build_index( vectors: &[Vec<f32>], max_degree: usize, build_beam_width: usize, alpha: f32, distance_metric: DistanceMetric, file_path: &str, ) -> Result<Self, DiskAnnError>
Builds a new index from provided vectors
§Arguments
vectors- The vectors to indexmax_degree- Maximum number of edges per node (typically 32-64)build_beam_width- Beam width during construction (typically 128)alpha- Pruning parameter (typically 1.2-2.0)distance_metric- Distance metric to usefile_path- Path where the index file will be created
§Returns
Returns Result<DiskANN, DiskAnnError>
Examples found in repository?
examples/demo.rs (lines 28-35)
6fn main() -> Result<(), DiskAnnError> {
7 let singlefile_path = "diskann.db";
8 let num_vectors = 100_000;
9 let dim = 128;
10 let max_degree = 32;
11 let build_beam_width = 128;
12 let alpha = 1.2;
13 let distance_metric = DistanceMetric::Cosine;
14
15 // Build if missing
16 if !std::path::Path::new(singlefile_path).exists() {
17 println!("Building DiskANN index at {singlefile_path}...");
18
19 // Generate sample vectors (in real usage, you'd load your own data)
20 println!("Generating {} sample vectors of dimension {}...", num_vectors, dim);
21 let mut rng = thread_rng();
22 let mut vectors = Vec::new();
23 for _ in 0..num_vectors {
24 let v: Vec<f32> = (0..dim).map(|_| rng.gen()).collect();
25 vectors.push(v);
26 }
27
28 let index = DiskANN::build_index(
29 &vectors,
30 max_degree,
31 build_beam_width,
32 alpha,
33 distance_metric,
34 singlefile_path,
35 )?;
36 println!("Build done. Index contains {} vectors", index.num_vectors);
37 } else {
38 println!("Index file {singlefile_path} already exists, skipping build.");
39 }
40
41 // Open the index
42 let index = Arc::new(DiskANN::open_index(singlefile_path)?);
43 println!(
44 "Opened index: {} vectors, dimension={}, max_degree={}",
45 index.num_vectors, index.dim, index.max_degree
46 );
47
48 // Perform a sample query
49 let mut rng = thread_rng();
50 let query: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
51 let k = 10;
52 let search_beam_width = 64;
53
54 println!("\nSearching for {} nearest neighbors with beam_width={}...", k, search_beam_width);
55 let start = std::time::Instant::now();
56 let neighbors = index.search(&query, k, search_beam_width);
57 let elapsed = start.elapsed();
58
59 println!("Search completed in {:?}", elapsed);
60 println!("Found {} neighbors:", neighbors.len());
61 for (i, &id) in neighbors.iter().enumerate() {
62 println!(" {}: node {}", i + 1, id);
63 }
64
65 Ok(())
66}More examples
examples/perf_test.rs (lines 38-45)
7fn main() -> Result<(), DiskAnnError> {
8 const NUM_VECTORS: usize = 1_000_000;
9 const DIM: usize = 1536;
10 const MAX_DEGREE: usize = 32;
11 const BUILD_BEAM_WIDTH: usize = 128;
12 const ALPHA: f32 = 1.2;
13 let distance_metric = DistanceMetric::Cosine;
14
15 let singlefile_path = "diskann_large.db";
16
17 // Build if missing
18 if !std::path::Path::new(singlefile_path).exists() {
19 println!(
20 "Building DiskANN index with {} vectors, dim={}, distance={:?}",
21 NUM_VECTORS, DIM, distance_metric
22 );
23
24 // Generate vectors
25 println!("Generating vectors...");
26 let mut rng = thread_rng();
27 let mut vectors = Vec::new();
28 for i in 0..NUM_VECTORS {
29 if i % 100_000 == 0 {
30 println!(" Generated {} vectors...", i);
31 }
32 let v: Vec<f32> = (0..DIM).map(|_| rng.gen()).collect();
33 vectors.push(v);
34 }
35
36 println!("Starting index build...");
37 let start = Instant::now();
38 let _index = DiskANN::build_index(
39 &vectors,
40 MAX_DEGREE,
41 BUILD_BEAM_WIDTH,
42 ALPHA,
43 distance_metric,
44 singlefile_path,
45 )?;
46 let elapsed = start.elapsed().as_secs_f32();
47 println!("Done building index in {:.2} s", elapsed);
48 } else {
49 println!(
50 "Index file {} already exists, skipping build.",
51 singlefile_path
52 );
53 }
54
55 // Open index
56 let open_start = Instant::now();
57 let index = Arc::new(DiskANN::open_index(singlefile_path)?);
58 let open_time = open_start.elapsed().as_secs_f32();
59 println!(
60 "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
61 index.num_vectors, index.dim, index.distance_metric, open_time
62 );
63
64 // Test memory efficiency with queries
65 let num_queries = 100;
66 let k = 10;
67 let beam_width = 64;
68
69 // Generate query batch
70 println!("\nGenerating {} query vectors...", num_queries);
71 let mut rng = thread_rng();
72 let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(num_queries);
73 for _ in 0..num_queries {
74 let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
75 query_batch.push(q);
76 }
77
78 // Sequential queries to measure individual performance
79 println!("\nRunning sequential queries to measure performance...");
80 let mut times = Vec::new();
81 for (i, query) in query_batch.iter().take(10).enumerate() {
82 let start = Instant::now();
83 let neighbors = index.search(query, k, beam_width);
84 let elapsed = start.elapsed();
85 times.push(elapsed.as_micros());
86 println!("Query {}: found {} neighbors in {:?}", i, neighbors.len(), elapsed);
87 }
88
89 let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
90 println!("Average query time: {:.2} µs", avg_time);
91
92 // Parallel queries to test throughput
93 println!("\nRunning {} queries in parallel...", num_queries);
94 let search_start = Instant::now();
95 let results: Vec<Vec<u32>> = query_batch
96 .par_iter()
97 .map(|query| index.search(query, k, beam_width))
98 .collect();
99 let search_time = search_start.elapsed().as_secs_f32();
100
101 println!("Performed {} queries in {:.2} s", num_queries, search_time);
102 println!("Throughput: {:.2} queries/sec", num_queries as f32 / search_time);
103
104 // Verify all queries returned results
105 let all_valid = results.iter().all(|r| r.len() == k.min(index.num_vectors));
106 println!("All queries returned valid results: {}", all_valid);
107
108 // Memory footprint check
109 println!("\nMemory-mapped index ready. The process should have minimal memory footprint.");
110 println!("You can check memory usage with 'ps aux | grep perf_test' in another terminal.");
111 println!("Press Enter to exit...");
112
113 let mut input = String::new();
114 std::io::stdin().read_line(&mut input)?;
115
116 Ok(())
117}Sourcepub fn open_index(path: &str) -> Result<Self, DiskAnnError>
pub fn open_index(path: &str) -> Result<Self, DiskAnnError>
Opens an existing index file
§Arguments
path- Path to the index file
§Returns
Returns Result<DiskANN, DiskAnnError>
Examples found in repository?
examples/demo.rs (line 42)
6fn main() -> Result<(), DiskAnnError> {
7 let singlefile_path = "diskann.db";
8 let num_vectors = 100_000;
9 let dim = 128;
10 let max_degree = 32;
11 let build_beam_width = 128;
12 let alpha = 1.2;
13 let distance_metric = DistanceMetric::Cosine;
14
15 // Build if missing
16 if !std::path::Path::new(singlefile_path).exists() {
17 println!("Building DiskANN index at {singlefile_path}...");
18
19 // Generate sample vectors (in real usage, you'd load your own data)
20 println!("Generating {} sample vectors of dimension {}...", num_vectors, dim);
21 let mut rng = thread_rng();
22 let mut vectors = Vec::new();
23 for _ in 0..num_vectors {
24 let v: Vec<f32> = (0..dim).map(|_| rng.gen()).collect();
25 vectors.push(v);
26 }
27
28 let index = DiskANN::build_index(
29 &vectors,
30 max_degree,
31 build_beam_width,
32 alpha,
33 distance_metric,
34 singlefile_path,
35 )?;
36 println!("Build done. Index contains {} vectors", index.num_vectors);
37 } else {
38 println!("Index file {singlefile_path} already exists, skipping build.");
39 }
40
41 // Open the index
42 let index = Arc::new(DiskANN::open_index(singlefile_path)?);
43 println!(
44 "Opened index: {} vectors, dimension={}, max_degree={}",
45 index.num_vectors, index.dim, index.max_degree
46 );
47
48 // Perform a sample query
49 let mut rng = thread_rng();
50 let query: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
51 let k = 10;
52 let search_beam_width = 64;
53
54 println!("\nSearching for {} nearest neighbors with beam_width={}...", k, search_beam_width);
55 let start = std::time::Instant::now();
56 let neighbors = index.search(&query, k, search_beam_width);
57 let elapsed = start.elapsed();
58
59 println!("Search completed in {:?}", elapsed);
60 println!("Found {} neighbors:", neighbors.len());
61 for (i, &id) in neighbors.iter().enumerate() {
62 println!(" {}: node {}", i + 1, id);
63 }
64
65 Ok(())
66}More examples
examples/perf_test.rs (line 57)
7fn main() -> Result<(), DiskAnnError> {
8 const NUM_VECTORS: usize = 1_000_000;
9 const DIM: usize = 1536;
10 const MAX_DEGREE: usize = 32;
11 const BUILD_BEAM_WIDTH: usize = 128;
12 const ALPHA: f32 = 1.2;
13 let distance_metric = DistanceMetric::Cosine;
14
15 let singlefile_path = "diskann_large.db";
16
17 // Build if missing
18 if !std::path::Path::new(singlefile_path).exists() {
19 println!(
20 "Building DiskANN index with {} vectors, dim={}, distance={:?}",
21 NUM_VECTORS, DIM, distance_metric
22 );
23
24 // Generate vectors
25 println!("Generating vectors...");
26 let mut rng = thread_rng();
27 let mut vectors = Vec::new();
28 for i in 0..NUM_VECTORS {
29 if i % 100_000 == 0 {
30 println!(" Generated {} vectors...", i);
31 }
32 let v: Vec<f32> = (0..DIM).map(|_| rng.gen()).collect();
33 vectors.push(v);
34 }
35
36 println!("Starting index build...");
37 let start = Instant::now();
38 let _index = DiskANN::build_index(
39 &vectors,
40 MAX_DEGREE,
41 BUILD_BEAM_WIDTH,
42 ALPHA,
43 distance_metric,
44 singlefile_path,
45 )?;
46 let elapsed = start.elapsed().as_secs_f32();
47 println!("Done building index in {:.2} s", elapsed);
48 } else {
49 println!(
50 "Index file {} already exists, skipping build.",
51 singlefile_path
52 );
53 }
54
55 // Open index
56 let open_start = Instant::now();
57 let index = Arc::new(DiskANN::open_index(singlefile_path)?);
58 let open_time = open_start.elapsed().as_secs_f32();
59 println!(
60 "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
61 index.num_vectors, index.dim, index.distance_metric, open_time
62 );
63
64 // Test memory efficiency with queries
65 let num_queries = 100;
66 let k = 10;
67 let beam_width = 64;
68
69 // Generate query batch
70 println!("\nGenerating {} query vectors...", num_queries);
71 let mut rng = thread_rng();
72 let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(num_queries);
73 for _ in 0..num_queries {
74 let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
75 query_batch.push(q);
76 }
77
78 // Sequential queries to measure individual performance
79 println!("\nRunning sequential queries to measure performance...");
80 let mut times = Vec::new();
81 for (i, query) in query_batch.iter().take(10).enumerate() {
82 let start = Instant::now();
83 let neighbors = index.search(query, k, beam_width);
84 let elapsed = start.elapsed();
85 times.push(elapsed.as_micros());
86 println!("Query {}: found {} neighbors in {:?}", i, neighbors.len(), elapsed);
87 }
88
89 let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
90 println!("Average query time: {:.2} µs", avg_time);
91
92 // Parallel queries to test throughput
93 println!("\nRunning {} queries in parallel...", num_queries);
94 let search_start = Instant::now();
95 let results: Vec<Vec<u32>> = query_batch
96 .par_iter()
97 .map(|query| index.search(query, k, beam_width))
98 .collect();
99 let search_time = search_start.elapsed().as_secs_f32();
100
101 println!("Performed {} queries in {:.2} s", num_queries, search_time);
102 println!("Throughput: {:.2} queries/sec", num_queries as f32 / search_time);
103
104 // Verify all queries returned results
105 let all_valid = results.iter().all(|r| r.len() == k.min(index.num_vectors));
106 println!("All queries returned valid results: {}", all_valid);
107
108 // Memory footprint check
109 println!("\nMemory-mapped index ready. The process should have minimal memory footprint.");
110 println!("You can check memory usage with 'ps aux | grep perf_test' in another terminal.");
111 println!("Press Enter to exit...");
112
113 let mut input = String::new();
114 std::io::stdin().read_line(&mut input)?;
115
116 Ok(())
117}Sourcepub fn search(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<u32>
pub fn search(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<u32>
Searches the index for nearest neighbors using beam search
§Arguments
query- Query vectork- Number of nearest neighbors to returnbeam_width- Beam width for the search (typically 32-128)
§Returns
Returns a vector of node IDs representing the nearest neighbors
Examples found in repository?
examples/demo.rs (line 56)
6fn main() -> Result<(), DiskAnnError> {
7 let singlefile_path = "diskann.db";
8 let num_vectors = 100_000;
9 let dim = 128;
10 let max_degree = 32;
11 let build_beam_width = 128;
12 let alpha = 1.2;
13 let distance_metric = DistanceMetric::Cosine;
14
15 // Build if missing
16 if !std::path::Path::new(singlefile_path).exists() {
17 println!("Building DiskANN index at {singlefile_path}...");
18
19 // Generate sample vectors (in real usage, you'd load your own data)
20 println!("Generating {} sample vectors of dimension {}...", num_vectors, dim);
21 let mut rng = thread_rng();
22 let mut vectors = Vec::new();
23 for _ in 0..num_vectors {
24 let v: Vec<f32> = (0..dim).map(|_| rng.gen()).collect();
25 vectors.push(v);
26 }
27
28 let index = DiskANN::build_index(
29 &vectors,
30 max_degree,
31 build_beam_width,
32 alpha,
33 distance_metric,
34 singlefile_path,
35 )?;
36 println!("Build done. Index contains {} vectors", index.num_vectors);
37 } else {
38 println!("Index file {singlefile_path} already exists, skipping build.");
39 }
40
41 // Open the index
42 let index = Arc::new(DiskANN::open_index(singlefile_path)?);
43 println!(
44 "Opened index: {} vectors, dimension={}, max_degree={}",
45 index.num_vectors, index.dim, index.max_degree
46 );
47
48 // Perform a sample query
49 let mut rng = thread_rng();
50 let query: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
51 let k = 10;
52 let search_beam_width = 64;
53
54 println!("\nSearching for {} nearest neighbors with beam_width={}...", k, search_beam_width);
55 let start = std::time::Instant::now();
56 let neighbors = index.search(&query, k, search_beam_width);
57 let elapsed = start.elapsed();
58
59 println!("Search completed in {:?}", elapsed);
60 println!("Found {} neighbors:", neighbors.len());
61 for (i, &id) in neighbors.iter().enumerate() {
62 println!(" {}: node {}", i + 1, id);
63 }
64
65 Ok(())
66}More examples
examples/perf_test.rs (line 83)
7fn main() -> Result<(), DiskAnnError> {
8 const NUM_VECTORS: usize = 1_000_000;
9 const DIM: usize = 1536;
10 const MAX_DEGREE: usize = 32;
11 const BUILD_BEAM_WIDTH: usize = 128;
12 const ALPHA: f32 = 1.2;
13 let distance_metric = DistanceMetric::Cosine;
14
15 let singlefile_path = "diskann_large.db";
16
17 // Build if missing
18 if !std::path::Path::new(singlefile_path).exists() {
19 println!(
20 "Building DiskANN index with {} vectors, dim={}, distance={:?}",
21 NUM_VECTORS, DIM, distance_metric
22 );
23
24 // Generate vectors
25 println!("Generating vectors...");
26 let mut rng = thread_rng();
27 let mut vectors = Vec::new();
28 for i in 0..NUM_VECTORS {
29 if i % 100_000 == 0 {
30 println!(" Generated {} vectors...", i);
31 }
32 let v: Vec<f32> = (0..DIM).map(|_| rng.gen()).collect();
33 vectors.push(v);
34 }
35
36 println!("Starting index build...");
37 let start = Instant::now();
38 let _index = DiskANN::build_index(
39 &vectors,
40 MAX_DEGREE,
41 BUILD_BEAM_WIDTH,
42 ALPHA,
43 distance_metric,
44 singlefile_path,
45 )?;
46 let elapsed = start.elapsed().as_secs_f32();
47 println!("Done building index in {:.2} s", elapsed);
48 } else {
49 println!(
50 "Index file {} already exists, skipping build.",
51 singlefile_path
52 );
53 }
54
55 // Open index
56 let open_start = Instant::now();
57 let index = Arc::new(DiskANN::open_index(singlefile_path)?);
58 let open_time = open_start.elapsed().as_secs_f32();
59 println!(
60 "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
61 index.num_vectors, index.dim, index.distance_metric, open_time
62 );
63
64 // Test memory efficiency with queries
65 let num_queries = 100;
66 let k = 10;
67 let beam_width = 64;
68
69 // Generate query batch
70 println!("\nGenerating {} query vectors...", num_queries);
71 let mut rng = thread_rng();
72 let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(num_queries);
73 for _ in 0..num_queries {
74 let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
75 query_batch.push(q);
76 }
77
78 // Sequential queries to measure individual performance
79 println!("\nRunning sequential queries to measure performance...");
80 let mut times = Vec::new();
81 for (i, query) in query_batch.iter().take(10).enumerate() {
82 let start = Instant::now();
83 let neighbors = index.search(query, k, beam_width);
84 let elapsed = start.elapsed();
85 times.push(elapsed.as_micros());
86 println!("Query {}: found {} neighbors in {:?}", i, neighbors.len(), elapsed);
87 }
88
89 let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
90 println!("Average query time: {:.2} µs", avg_time);
91
92 // Parallel queries to test throughput
93 println!("\nRunning {} queries in parallel...", num_queries);
94 let search_start = Instant::now();
95 let results: Vec<Vec<u32>> = query_batch
96 .par_iter()
97 .map(|query| index.search(query, k, beam_width))
98 .collect();
99 let search_time = search_start.elapsed().as_secs_f32();
100
101 println!("Performed {} queries in {:.2} s", num_queries, search_time);
102 println!("Throughput: {:.2} queries/sec", num_queries as f32 / search_time);
103
104 // Verify all queries returned results
105 let all_valid = results.iter().all(|r| r.len() == k.min(index.num_vectors));
106 println!("All queries returned valid results: {}", all_valid);
107
108 // Memory footprint check
109 println!("\nMemory-mapped index ready. The process should have minimal memory footprint.");
110 println!("You can check memory usage with 'ps aux | grep perf_test' in another terminal.");
111 println!("Press Enter to exit...");
112
113 let mut input = String::new();
114 std::io::stdin().read_line(&mut input)?;
115
116 Ok(())
117}Sourcepub fn get_vector(&self, idx: usize) -> Vec<f32>
pub fn get_vector(&self, idx: usize) -> Vec<f32>
Gets a vector from the index (for testing)
Auto Trait Implementations§
impl Freeze for DiskANN
impl RefUnwindSafe for DiskANN
impl Send for DiskANN
impl Sync for DiskANN
impl Unpin for DiskANN
impl UnwindSafe for DiskANN
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more