Struct SingleFileDiskANN

Source
pub struct SingleFileDiskANN {
    pub dim: usize,
    pub num_vectors: usize,
    pub max_degree: usize,
    pub fraction_top: f64,
    pub fraction_mid: f64,
    pub distance_metric: DistanceMetric,
    /* private fields */
}
Expand description

Main struct representing a DiskANN index

Fields§

§dim: usize

Dimensionality of vectors in the index

§num_vectors: usize

Number of vectors in the index

§max_degree: usize

Maximum number of edges per node

§fraction_top: f64

Fraction of vectors in top layer

§fraction_mid: f64

Fraction of vectors in middle layer

§distance_metric: DistanceMetric

Distance metric used by this index

Implementations§

Source§

impl SingleFileDiskANN

Source

pub fn build_index_singlefile( num_vectors: usize, dim: usize, max_degree: usize, fraction_top: f64, fraction_mid: f64, distance_metric: DistanceMetric, singlefile_path: &str, ) -> Result<Self, DiskAnnError>

Builds a new single-file index with the specified parameters

§Arguments
  • num_vectors - Number of vectors to store
  • dim - Dimensionality of the vectors
  • max_degree - Maximum number of edges per node
  • fraction_top - Fraction of vectors in top layer
  • fraction_mid - Fraction of vectors in middle layer
  • distance_metric - Distance metric to use
  • singlefile_path - Path where the index file will be created
§Returns

Returns Result<SingleFileDiskANN, DiskAnnError>

Examples found in repository?
examples/demo.rs (lines 17-25)
5fn main() -> Result<(), DiskAnnError> {
6    let singlefile_path = "diskann.db";
7    let num_vectors = 100_000;
8    let dim = 128;
9    let max_degree = 32;
10    let fraction_top = 0.01;
11    let fraction_mid = 0.1;
12    let distance_metric = DistanceMetric::Cosine;
13
14    // Build if missing
15    if !std::path::Path::new(singlefile_path).exists() {
16        println!("Building single-file diskann at {singlefile_path}...");
17        let index = SingleFileDiskANN::build_index_singlefile(
18            num_vectors,
19            dim,
20            max_degree,
21            fraction_top,
22            fraction_mid,
23            distance_metric,
24            singlefile_path,
25        )?;
26        println!("Build done. Index dimension = {}", index.dim);
27    } else {
28        println!("Index file {singlefile_path} already exists, skipping build.");
29    }
30
31    // Open
32    let index = Arc::new(SingleFileDiskANN::open_index_singlefile(singlefile_path)?);
33
34    // Query
35    let query = vec![0.1, 0.2, 0.3 /* ... up to dim */];
36    let k = 10;
37    let beam_width = 64;
38    let neighbors = index.search(&query, k, beam_width);
39    println!("Neighbors for the sample query = {:?}", neighbors);
40
41    Ok(())
42}
More examples
Hide additional examples
examples/perf_test.rs (lines 24-32)
7fn main() -> Result<(), DiskAnnError> {
8    const NUM_VECTORS: usize = 1_000_000;
9    const DIM: usize = 1536;
10    const MAX_DEGREE: usize = 32;
11    const FRACTION_TOP: f64 = 0.01;
12    const FRACTION_MID: f64 = 0.1;
13    let distance_metric = DistanceMetric::Cosine;
14
15    let singlefile_path = "diskann_parallel.db";
16
17    // Build if missing
18    if !std::path::Path::new(singlefile_path).exists() {
19        println!(
20            "Building single-file index with parallel adjacency + distance={:?}",
21            distance_metric
22        );
23        let start = Instant::now();
24        let _index = SingleFileDiskANN::build_index_singlefile(
25            NUM_VECTORS,
26            DIM,
27            MAX_DEGREE,
28            FRACTION_TOP,
29            FRACTION_MID,
30            distance_metric,
31            singlefile_path,
32        )?;
33        let elapsed = start.elapsed().as_secs_f32();
34        println!("Done building index in {:.2} s", elapsed);
35    } else {
36        println!(
37            "Index file {} already exists, skipping build.",
38            singlefile_path
39        );
40    }
41
42    // open
43    let open_start = Instant::now();
44    let index = Arc::new(SingleFileDiskANN::open_index_singlefile(singlefile_path)?);
45    let open_time = open_start.elapsed().as_secs_f32();
46    println!(
47        "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
48        index.num_vectors, index.dim, index.distance_metric, open_time
49    );
50
51    // Create queries
52    let queries = 5;
53    let k = 10;
54    let beam_width = 64;
55
56    // Generate all queries in a batch
57    let mut rng = rand::thread_rng();
58    let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(queries);
59    for _ in 0..queries {
60        let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
61        query_batch.push(q);
62    }
63
64    // Now run queries in parallel
65    let search_start = Instant::now();
66    query_batch.par_iter().enumerate().for_each(|(i, query)| {
67        let neighbors = index.search(query, k, beam_width);
68        println!("Query {i} => top-{k} neighbors = {:?}", neighbors);
69    });
70    let search_time = search_start.elapsed().as_secs_f32();
71    println!("Performed {queries} queries in {:.2} s", search_time);
72
73    Ok(())
74}
Source

pub fn open_index_singlefile(path: &str) -> Result<Self, DiskAnnError>

Opens an existing index file

§Arguments
  • path - Path to the index file
§Returns

Returns Result<SingleFileDiskANN, DiskAnnError>

Examples found in repository?
examples/demo.rs (line 32)
5fn main() -> Result<(), DiskAnnError> {
6    let singlefile_path = "diskann.db";
7    let num_vectors = 100_000;
8    let dim = 128;
9    let max_degree = 32;
10    let fraction_top = 0.01;
11    let fraction_mid = 0.1;
12    let distance_metric = DistanceMetric::Cosine;
13
14    // Build if missing
15    if !std::path::Path::new(singlefile_path).exists() {
16        println!("Building single-file diskann at {singlefile_path}...");
17        let index = SingleFileDiskANN::build_index_singlefile(
18            num_vectors,
19            dim,
20            max_degree,
21            fraction_top,
22            fraction_mid,
23            distance_metric,
24            singlefile_path,
25        )?;
26        println!("Build done. Index dimension = {}", index.dim);
27    } else {
28        println!("Index file {singlefile_path} already exists, skipping build.");
29    }
30
31    // Open
32    let index = Arc::new(SingleFileDiskANN::open_index_singlefile(singlefile_path)?);
33
34    // Query
35    let query = vec![0.1, 0.2, 0.3 /* ... up to dim */];
36    let k = 10;
37    let beam_width = 64;
38    let neighbors = index.search(&query, k, beam_width);
39    println!("Neighbors for the sample query = {:?}", neighbors);
40
41    Ok(())
42}
More examples
Hide additional examples
examples/perf_test.rs (line 44)
7fn main() -> Result<(), DiskAnnError> {
8    const NUM_VECTORS: usize = 1_000_000;
9    const DIM: usize = 1536;
10    const MAX_DEGREE: usize = 32;
11    const FRACTION_TOP: f64 = 0.01;
12    const FRACTION_MID: f64 = 0.1;
13    let distance_metric = DistanceMetric::Cosine;
14
15    let singlefile_path = "diskann_parallel.db";
16
17    // Build if missing
18    if !std::path::Path::new(singlefile_path).exists() {
19        println!(
20            "Building single-file index with parallel adjacency + distance={:?}",
21            distance_metric
22        );
23        let start = Instant::now();
24        let _index = SingleFileDiskANN::build_index_singlefile(
25            NUM_VECTORS,
26            DIM,
27            MAX_DEGREE,
28            FRACTION_TOP,
29            FRACTION_MID,
30            distance_metric,
31            singlefile_path,
32        )?;
33        let elapsed = start.elapsed().as_secs_f32();
34        println!("Done building index in {:.2} s", elapsed);
35    } else {
36        println!(
37            "Index file {} already exists, skipping build.",
38            singlefile_path
39        );
40    }
41
42    // open
43    let open_start = Instant::now();
44    let index = Arc::new(SingleFileDiskANN::open_index_singlefile(singlefile_path)?);
45    let open_time = open_start.elapsed().as_secs_f32();
46    println!(
47        "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
48        index.num_vectors, index.dim, index.distance_metric, open_time
49    );
50
51    // Create queries
52    let queries = 5;
53    let k = 10;
54    let beam_width = 64;
55
56    // Generate all queries in a batch
57    let mut rng = rand::thread_rng();
58    let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(queries);
59    for _ in 0..queries {
60        let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
61        query_batch.push(q);
62    }
63
64    // Now run queries in parallel
65    let search_start = Instant::now();
66    query_batch.par_iter().enumerate().for_each(|(i, query)| {
67        let neighbors = index.search(query, k, beam_width);
68        println!("Query {i} => top-{k} neighbors = {:?}", neighbors);
69    });
70    let search_time = search_start.elapsed().as_secs_f32();
71    println!("Performed {queries} queries in {:.2} s", search_time);
72
73    Ok(())
74}
Source

pub fn search(&self, query: &[f32], k: usize, beam_width: usize) -> Vec<u32>

Searches the index for nearest neighbors

§Arguments
  • query - Query vector
  • k - Number of nearest neighbors to return
  • beam_width - Beam width for the search
§Returns

Returns a vector of node IDs representing the nearest neighbors

Examples found in repository?
examples/demo.rs (line 38)
5fn main() -> Result<(), DiskAnnError> {
6    let singlefile_path = "diskann.db";
7    let num_vectors = 100_000;
8    let dim = 128;
9    let max_degree = 32;
10    let fraction_top = 0.01;
11    let fraction_mid = 0.1;
12    let distance_metric = DistanceMetric::Cosine;
13
14    // Build if missing
15    if !std::path::Path::new(singlefile_path).exists() {
16        println!("Building single-file diskann at {singlefile_path}...");
17        let index = SingleFileDiskANN::build_index_singlefile(
18            num_vectors,
19            dim,
20            max_degree,
21            fraction_top,
22            fraction_mid,
23            distance_metric,
24            singlefile_path,
25        )?;
26        println!("Build done. Index dimension = {}", index.dim);
27    } else {
28        println!("Index file {singlefile_path} already exists, skipping build.");
29    }
30
31    // Open
32    let index = Arc::new(SingleFileDiskANN::open_index_singlefile(singlefile_path)?);
33
34    // Query
35    let query = vec![0.1, 0.2, 0.3 /* ... up to dim */];
36    let k = 10;
37    let beam_width = 64;
38    let neighbors = index.search(&query, k, beam_width);
39    println!("Neighbors for the sample query = {:?}", neighbors);
40
41    Ok(())
42}
More examples
Hide additional examples
examples/perf_test.rs (line 67)
7fn main() -> Result<(), DiskAnnError> {
8    const NUM_VECTORS: usize = 1_000_000;
9    const DIM: usize = 1536;
10    const MAX_DEGREE: usize = 32;
11    const FRACTION_TOP: f64 = 0.01;
12    const FRACTION_MID: f64 = 0.1;
13    let distance_metric = DistanceMetric::Cosine;
14
15    let singlefile_path = "diskann_parallel.db";
16
17    // Build if missing
18    if !std::path::Path::new(singlefile_path).exists() {
19        println!(
20            "Building single-file index with parallel adjacency + distance={:?}",
21            distance_metric
22        );
23        let start = Instant::now();
24        let _index = SingleFileDiskANN::build_index_singlefile(
25            NUM_VECTORS,
26            DIM,
27            MAX_DEGREE,
28            FRACTION_TOP,
29            FRACTION_MID,
30            distance_metric,
31            singlefile_path,
32        )?;
33        let elapsed = start.elapsed().as_secs_f32();
34        println!("Done building index in {:.2} s", elapsed);
35    } else {
36        println!(
37            "Index file {} already exists, skipping build.",
38            singlefile_path
39        );
40    }
41
42    // open
43    let open_start = Instant::now();
44    let index = Arc::new(SingleFileDiskANN::open_index_singlefile(singlefile_path)?);
45    let open_time = open_start.elapsed().as_secs_f32();
46    println!(
47        "Opened index with {} vectors, dim={}, metric={:?} in {:.2} s",
48        index.num_vectors, index.dim, index.distance_metric, open_time
49    );
50
51    // Create queries
52    let queries = 5;
53    let k = 10;
54    let beam_width = 64;
55
56    // Generate all queries in a batch
57    let mut rng = rand::thread_rng();
58    let mut query_batch: Vec<Vec<f32>> = Vec::with_capacity(queries);
59    for _ in 0..queries {
60        let q: Vec<f32> = (0..index.dim).map(|_| rng.gen()).collect();
61        query_batch.push(q);
62    }
63
64    // Now run queries in parallel
65    let search_start = Instant::now();
66    query_batch.par_iter().enumerate().for_each(|(i, query)| {
67        let neighbors = index.search(query, k, beam_width);
68        println!("Query {i} => top-{k} neighbors = {:?}", neighbors);
69    });
70    let search_time = search_start.elapsed().as_secs_f32();
71    println!("Performed {queries} queries in {:.2} s", search_time);
72
73    Ok(())
74}

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V