DiskANN

Struct DiskANN 

Source
pub struct DiskANN {
    pub dim: usize,
    pub num_vectors: usize,
    pub max_degree: usize,
    pub distance_metric: DistanceMetric,
    /* private fields */
}
Expand description

Main struct representing a DiskANN index

Fields§

§dim: usize

Dimensionality of vectors in the index

§num_vectors: usize

Number of vectors in the index

§max_degree: usize

Maximum number of edges per node

§distance_metric: DistanceMetric

Distance metric used by this index

Implementations§

Source§

impl DiskANN

Source

pub fn build_index( vectors: &[Vec<f32>], max_degree: usize, build_beam_width: usize, alpha: f32, distance_metric: DistanceMetric, file_path: &str, ) -> Result<Self, DiskAnnError>

Builds a new index from provided vectors

§Arguments
  • vectors - The vectors to index
  • max_degree - Maximum number of edges per node (typically 32-64)
  • build_beam_width - Beam width during construction (typically 128)
  • alpha - Pruning parameter (typically 1.2-2.0)
  • distance_metric - Distance metric to use
  • file_path - Path where the index file will be created
§Returns

Returns Result<DiskANN, DiskAnnError>

Examples found in repository?
examples/demo.rs (lines 28-35)
6fn main() -> Result<(), DiskAnnError> {
7    let singlefile_path = "diskann.db";
8    let num_vectors = 100_000;
9    let dim = 128;
10    let max_degree = 32;
11    let build_beam_width = 128;
12    let alpha = 1.2;
13    let distance_metric = DistanceMetric::Cosine;
14
15    // Build if missing
16    if !std::path::Path::new(singlefile_path).exists() {
17        println!("Building DiskANN index at {singlefile_path}...");
18        
19        // Generate sample vectors (in real usage, you'd load your own data)
20        println!("Generating {} sample vectors of dimension {}...", num_vectors, dim);
21        let mut rng = thread_rng();
22        let mut vectors = Vec::new();
23        for _ in 0..num_vectors {
24            let v: Vec<f32> = (0..dim).map(|_| rng.gen()).collect();
25            vectors.push(v);
26        }
27        
28        let index = DiskANN::build_index(
29            &vectors,
30            max_degree,
31            build_beam_width,
32            alpha,
33            distance_metric,
34            singlefile_path,
35        )?;
36        println!("Build done. Index contains {} vectors", index.num_vectors);
37    } else {
38        println!("Index file {singlefile_path} already exists, skipping build.");
39    }
40
41    // Open the index
42    let index = Arc::new(DiskANN::open_index(singlefile_path)?);
43    println!(
44        "Opened index: {} vectors, dimension={}, max_degree={}",
45        index.num_vectors, index.dim, index.max_degree
46    );
47
48    // Perform a sample query
49    let mut rng = thread_rng();
50    let query: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
51    let k = 10;
52    let search_beam_width = 64;
53    
54    println!("\nSearching for {} nearest neighbors with beam_width={}...", k, search_beam_width);
55    let start = std::time::Instant::now();
56    let neighbors = index.search(&query, k, search_beam_width);
57    let elapsed = start.elapsed();
58    
59    println!("Search completed in {:?}", elapsed);
60    println!("Found {} neighbors:", neighbors.len());
61    for (i, &id) in neighbors.iter().enumerate() {
62        println!("  {}: node {}", i + 1, id);
63    }
64
65    Ok(())
66}
More examples
Hide additional examples
examples/perf_test.rs (lines 38-45)
7fn main() -> Result<(), DiskAnnError> {
8    const NUM_VECTORS: usize = 1_000_000;
9    const DIM: usize = 1536;
10    const MAX_DEGREE: usize = 32;
11    const BUILD_BEAM_WIDTH: usize = 128;
12    const ALPHA: f32 = 1.2;
13    let distance_metric = DistanceMetric::Cosine;
14
15    let singlefile_path = "diskann_large.db";
16
17    // Build if missing
18    if !std::path::Path::new(singlefile_path).exists() {
19        println!(
20            "Building DiskANN index with {} vectors, dim={}, distance={:?}",
21            NUM_VECTORS, DIM, distance_metric
22        );
23        
24        // Generate vectors
25        println!("Generating vectors...");
26        let mut rng = thread_rng();
27        let mut vectors = Vec::new();
28        for i in 0..NUM_VECTORS {
29            if i % 100_000 == 0 {
30                println!("  Generated {} vectors...", i);
31            }
32            let v: Vec<f32> = (0..DIM).map(|_| rng.gen()).collect();
33            vectors.push(v);
34        }
35        
36        println!("Starting index build...");
37        let start = Instant::now();
38        let _index = DiskANN::build_index(
39            &vectors,
40            MAX_DEGREE,
41            BUILD_BEAM_WIDTH,
42            ALPHA,
43            distance_metric,
44            singlefile_path,
45        )?;
46        let elapsed = start.elapsed().as_secs_f32();
47        println!("Done building index in {:.2} s", elapsed);
48    } else {
49        println!(
50            "Index file {} already exists, skipping build.",
51            singlefile_path
52        );
53    }
54
55    // Open index
56    let open_start = Instant::now();
57    let index = Arc::new(DiskANN::open_index(singlefile_path)?);
58    let open_time = open_start.elapsed().as_secs_f32();
59    println!(
60        "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
61        index.num_vectors, index.dim, index.distance_metric, open_time
62    );
63
64    // Test memory efficiency with queries
65    let num_queries = 100;
66    let k = 10;
67    let beam_width = 64;
68
69    // Generate query batch
70    println!("\nGenerating {} query vectors...", num_queries);
71    let mut rng = thread_rng();
72    let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(num_queries);
73    for _ in 0..num_queries {
74        let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
75        query_batch.push(q);
76    }
77
78    // Sequential queries to measure individual performance
79    println!("\nRunning sequential queries to measure performance...");
80    let mut times = Vec::new();
81    for (i, query) in query_batch.iter().take(10).enumerate() {
82        let start = Instant::now();
83        let neighbors = index.search(query, k, beam_width);
84        let elapsed = start.elapsed();
85        times.push(elapsed.as_micros());
86        println!("Query {}: found {} neighbors in {:?}", i, neighbors.len(), elapsed);
87    }
88    
89    let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
90    println!("Average query time: {:.2} µs", avg_time);
91
92    // Parallel queries to test throughput
93    println!("\nRunning {} queries in parallel...", num_queries);
94    let search_start = Instant::now();
95    let results: Vec<Vec<u32>> = query_batch
96        .par_iter()
97        .map(|query| index.search(query, k, beam_width))
98        .collect();
99    let search_time = search_start.elapsed().as_secs_f32();
100    
101    println!("Performed {} queries in {:.2} s", num_queries, search_time);
102    println!("Throughput: {:.2} queries/sec", num_queries as f32 / search_time);
103    
104    // Verify all queries returned results
105    let all_valid = results.iter().all(|r| r.len() == k.min(index.num_vectors));
106    println!("All queries returned valid results: {}", all_valid);
107
108    // Memory footprint check
109    println!("\nMemory-mapped index ready. The process should have minimal memory footprint.");
110    println!("You can check memory usage with 'ps aux | grep perf_test' in another terminal.");
111    println!("Press Enter to exit...");
112    
113    let mut input = String::new();
114    std::io::stdin().read_line(&mut input)?;
115
116    Ok(())
117}
Source

pub fn open_index(path: &str) -> Result<Self, DiskAnnError>

Opens an existing index file

§Arguments
  • path - Path to the index file
§Returns

Returns Result<DiskANN, DiskAnnError>

Examples found in repository?
examples/demo.rs (line 42)
6fn main() -> Result<(), DiskAnnError> {
7    let singlefile_path = "diskann.db";
8    let num_vectors = 100_000;
9    let dim = 128;
10    let max_degree = 32;
11    let build_beam_width = 128;
12    let alpha = 1.2;
13    let distance_metric = DistanceMetric::Cosine;
14
15    // Build if missing
16    if !std::path::Path::new(singlefile_path).exists() {
17        println!("Building DiskANN index at {singlefile_path}...");
18        
19        // Generate sample vectors (in real usage, you'd load your own data)
20        println!("Generating {} sample vectors of dimension {}...", num_vectors, dim);
21        let mut rng = thread_rng();
22        let mut vectors = Vec::new();
23        for _ in 0..num_vectors {
24            let v: Vec<f32> = (0..dim).map(|_| rng.gen()).collect();
25            vectors.push(v);
26        }
27        
28        let index = DiskANN::build_index(
29            &vectors,
30            max_degree,
31            build_beam_width,
32            alpha,
33            distance_metric,
34            singlefile_path,
35        )?;
36        println!("Build done. Index contains {} vectors", index.num_vectors);
37    } else {
38        println!("Index file {singlefile_path} already exists, skipping build.");
39    }
40
41    // Open the index
42    let index = Arc::new(DiskANN::open_index(singlefile_path)?);
43    println!(
44        "Opened index: {} vectors, dimension={}, max_degree={}",
45        index.num_vectors, index.dim, index.max_degree
46    );
47
48    // Perform a sample query
49    let mut rng = thread_rng();
50    let query: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
51    let k = 10;
52    let search_beam_width = 64;
53    
54    println!("\nSearching for {} nearest neighbors with beam_width={}...", k, search_beam_width);
55    let start = std::time::Instant::now();
56    let neighbors = index.search(&query, k, search_beam_width);
57    let elapsed = start.elapsed();
58    
59    println!("Search completed in {:?}", elapsed);
60    println!("Found {} neighbors:", neighbors.len());
61    for (i, &id) in neighbors.iter().enumerate() {
62        println!("  {}: node {}", i + 1, id);
63    }
64
65    Ok(())
66}
More examples
Hide additional examples
examples/perf_test.rs (line 57)
7fn main() -> Result<(), DiskAnnError> {
8    const NUM_VECTORS: usize = 1_000_000;
9    const DIM: usize = 1536;
10    const MAX_DEGREE: usize = 32;
11    const BUILD_BEAM_WIDTH: usize = 128;
12    const ALPHA: f32 = 1.2;
13    let distance_metric = DistanceMetric::Cosine;
14
15    let singlefile_path = "diskann_large.db";
16
17    // Build if missing
18    if !std::path::Path::new(singlefile_path).exists() {
19        println!(
20            "Building DiskANN index with {} vectors, dim={}, distance={:?}",
21            NUM_VECTORS, DIM, distance_metric
22        );
23        
24        // Generate vectors
25        println!("Generating vectors...");
26        let mut rng = thread_rng();
27        let mut vectors = Vec::new();
28        for i in 0..NUM_VECTORS {
29            if i % 100_000 == 0 {
30                println!("  Generated {} vectors...", i);
31            }
32            let v: Vec<f32> = (0..DIM).map(|_| rng.gen()).collect();
33            vectors.push(v);
34        }
35        
36        println!("Starting index build...");
37        let start = Instant::now();
38        let _index = DiskANN::build_index(
39            &vectors,
40            MAX_DEGREE,
41            BUILD_BEAM_WIDTH,
42            ALPHA,
43            distance_metric,
44            singlefile_path,
45        )?;
46        let elapsed = start.elapsed().as_secs_f32();
47        println!("Done building index in {:.2} s", elapsed);
48    } else {
49        println!(
50            "Index file {} already exists, skipping build.",
51            singlefile_path
52        );
53    }
54
55    // Open index
56    let open_start = Instant::now();
57    let index = Arc::new(DiskANN::open_index(singlefile_path)?);
58    let open_time = open_start.elapsed().as_secs_f32();
59    println!(
60        "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
61        index.num_vectors, index.dim, index.distance_metric, open_time
62    );
63
64    // Test memory efficiency with queries
65    let num_queries = 100;
66    let k = 10;
67    let beam_width = 64;
68
69    // Generate query batch
70    println!("\nGenerating {} query vectors...", num_queries);
71    let mut rng = thread_rng();
72    let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(num_queries);
73    for _ in 0..num_queries {
74        let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
75        query_batch.push(q);
76    }
77
78    // Sequential queries to measure individual performance
79    println!("\nRunning sequential queries to measure performance...");
80    let mut times = Vec::new();
81    for (i, query) in query_batch.iter().take(10).enumerate() {
82        let start = Instant::now();
83        let neighbors = index.search(query, k, beam_width);
84        let elapsed = start.elapsed();
85        times.push(elapsed.as_micros());
86        println!("Query {}: found {} neighbors in {:?}", i, neighbors.len(), elapsed);
87    }
88    
89    let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
90    println!("Average query time: {:.2} µs", avg_time);
91
92    // Parallel queries to test throughput
93    println!("\nRunning {} queries in parallel...", num_queries);
94    let search_start = Instant::now();
95    let results: Vec<Vec<u32>> = query_batch
96        .par_iter()
97        .map(|query| index.search(query, k, beam_width))
98        .collect();
99    let search_time = search_start.elapsed().as_secs_f32();
100    
101    println!("Performed {} queries in {:.2} s", num_queries, search_time);
102    println!("Throughput: {:.2} queries/sec", num_queries as f32 / search_time);
103    
104    // Verify all queries returned results
105    let all_valid = results.iter().all(|r| r.len() == k.min(index.num_vectors));
106    println!("All queries returned valid results: {}", all_valid);
107
108    // Memory footprint check
109    println!("\nMemory-mapped index ready. The process should have minimal memory footprint.");
110    println!("You can check memory usage with 'ps aux | grep perf_test' in another terminal.");
111    println!("Press Enter to exit...");
112    
113    let mut input = String::new();
114    std::io::stdin().read_line(&mut input)?;
115
116    Ok(())
117}
Source

pub fn search(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<u32>

Searches the index for nearest neighbors using beam search

§Arguments
  • query - Query vector
  • k - Number of nearest neighbors to return
  • beam_width - Beam width for the search (typically 32-128)
§Returns

Returns a vector of node IDs representing the nearest neighbors

Examples found in repository?
examples/demo.rs (line 56)
6fn main() -> Result<(), DiskAnnError> {
7    let singlefile_path = "diskann.db";
8    let num_vectors = 100_000;
9    let dim = 128;
10    let max_degree = 32;
11    let build_beam_width = 128;
12    let alpha = 1.2;
13    let distance_metric = DistanceMetric::Cosine;
14
15    // Build if missing
16    if !std::path::Path::new(singlefile_path).exists() {
17        println!("Building DiskANN index at {singlefile_path}...");
18        
19        // Generate sample vectors (in real usage, you'd load your own data)
20        println!("Generating {} sample vectors of dimension {}...", num_vectors, dim);
21        let mut rng = thread_rng();
22        let mut vectors = Vec::new();
23        for _ in 0..num_vectors {
24            let v: Vec<f32> = (0..dim).map(|_| rng.gen()).collect();
25            vectors.push(v);
26        }
27        
28        let index = DiskANN::build_index(
29            &vectors,
30            max_degree,
31            build_beam_width,
32            alpha,
33            distance_metric,
34            singlefile_path,
35        )?;
36        println!("Build done. Index contains {} vectors", index.num_vectors);
37    } else {
38        println!("Index file {singlefile_path} already exists, skipping build.");
39    }
40
41    // Open the index
42    let index = Arc::new(DiskANN::open_index(singlefile_path)?);
43    println!(
44        "Opened index: {} vectors, dimension={}, max_degree={}",
45        index.num_vectors, index.dim, index.max_degree
46    );
47
48    // Perform a sample query
49    let mut rng = thread_rng();
50    let query: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
51    let k = 10;
52    let search_beam_width = 64;
53    
54    println!("\nSearching for {} nearest neighbors with beam_width={}...", k, search_beam_width);
55    let start = std::time::Instant::now();
56    let neighbors = index.search(&query, k, search_beam_width);
57    let elapsed = start.elapsed();
58    
59    println!("Search completed in {:?}", elapsed);
60    println!("Found {} neighbors:", neighbors.len());
61    for (i, &id) in neighbors.iter().enumerate() {
62        println!("  {}: node {}", i + 1, id);
63    }
64
65    Ok(())
66}
More examples
Hide additional examples
examples/perf_test.rs (line 83)
7fn main() -> Result<(), DiskAnnError> {
8    const NUM_VECTORS: usize = 1_000_000;
9    const DIM: usize = 1536;
10    const MAX_DEGREE: usize = 32;
11    const BUILD_BEAM_WIDTH: usize = 128;
12    const ALPHA: f32 = 1.2;
13    let distance_metric = DistanceMetric::Cosine;
14
15    let singlefile_path = "diskann_large.db";
16
17    // Build if missing
18    if !std::path::Path::new(singlefile_path).exists() {
19        println!(
20            "Building DiskANN index with {} vectors, dim={}, distance={:?}",
21            NUM_VECTORS, DIM, distance_metric
22        );
23        
24        // Generate vectors
25        println!("Generating vectors...");
26        let mut rng = thread_rng();
27        let mut vectors = Vec::new();
28        for i in 0..NUM_VECTORS {
29            if i % 100_000 == 0 {
30                println!("  Generated {} vectors...", i);
31            }
32            let v: Vec<f32> = (0..DIM).map(|_| rng.gen()).collect();
33            vectors.push(v);
34        }
35        
36        println!("Starting index build...");
37        let start = Instant::now();
38        let _index = DiskANN::build_index(
39            &vectors,
40            MAX_DEGREE,
41            BUILD_BEAM_WIDTH,
42            ALPHA,
43            distance_metric,
44            singlefile_path,
45        )?;
46        let elapsed = start.elapsed().as_secs_f32();
47        println!("Done building index in {:.2} s", elapsed);
48    } else {
49        println!(
50            "Index file {} already exists, skipping build.",
51            singlefile_path
52        );
53    }
54
55    // Open index
56    let open_start = Instant::now();
57    let index = Arc::new(DiskANN::open_index(singlefile_path)?);
58    let open_time = open_start.elapsed().as_secs_f32();
59    println!(
60        "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
61        index.num_vectors, index.dim, index.distance_metric, open_time
62    );
63
64    // Test memory efficiency with queries
65    let num_queries = 100;
66    let k = 10;
67    let beam_width = 64;
68
69    // Generate query batch
70    println!("\nGenerating {} query vectors...", num_queries);
71    let mut rng = thread_rng();
72    let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(num_queries);
73    for _ in 0..num_queries {
74        let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
75        query_batch.push(q);
76    }
77
78    // Sequential queries to measure individual performance
79    println!("\nRunning sequential queries to measure performance...");
80    let mut times = Vec::new();
81    for (i, query) in query_batch.iter().take(10).enumerate() {
82        let start = Instant::now();
83        let neighbors = index.search(query, k, beam_width);
84        let elapsed = start.elapsed();
85        times.push(elapsed.as_micros());
86        println!("Query {}: found {} neighbors in {:?}", i, neighbors.len(), elapsed);
87    }
88    
89    let avg_time = times.iter().sum::<u128>() as f64 / times.len() as f64;
90    println!("Average query time: {:.2} µs", avg_time);
91
92    // Parallel queries to test throughput
93    println!("\nRunning {} queries in parallel...", num_queries);
94    let search_start = Instant::now();
95    let results: Vec<Vec<u32>> = query_batch
96        .par_iter()
97        .map(|query| index.search(query, k, beam_width))
98        .collect();
99    let search_time = search_start.elapsed().as_secs_f32();
100    
101    println!("Performed {} queries in {:.2} s", num_queries, search_time);
102    println!("Throughput: {:.2} queries/sec", num_queries as f32 / search_time);
103    
104    // Verify all queries returned results
105    let all_valid = results.iter().all(|r| r.len() == k.min(index.num_vectors));
106    println!("All queries returned valid results: {}", all_valid);
107
108    // Memory footprint check
109    println!("\nMemory-mapped index ready. The process should have minimal memory footprint.");
110    println!("You can check memory usage with 'ps aux | grep perf_test' in another terminal.");
111    println!("Press Enter to exit...");
112    
113    let mut input = String::new();
114    std::io::stdin().read_line(&mut input)?;
115
116    Ok(())
117}
Source

pub fn get_vector(&self, idx: usize) -> Vec<f32>

Gets a vector from the index (for testing)

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V