diskann-benchmark-core 0.47.0

DiskANN is a fast approximate nearest neighbor search library for high dimensional data
Documentation
/*
 * Copyright (c) Microsoft Corporation.
 * Licensed under the MIT license.
 */

//! # Shareable Infrastructure for Benchmarking Vector Indexing
//!
//! The purpose of this crate is to create abstractions and implementations for benchmarking
//! DiskANN vector indexing operations. We try to facilitate infrastructure that can be
//! shared across a range of [`diskann::provider::DataProvider`]s with stable APIs to enable
//!
//! * A tight benchmarking loop for developers performing performance optimization.
//! * Creating standalone binaries for CI benchmarking jobs.
//! * Shared infrastructure to facilitate developing new providers.
//!
//! # Algorithms
//!
//! - [`build`]: Tools for running parallelized index builds.
//!   - [`build::graph`]: Built-in utilities for working with [`diskann::graph::DiskANNIndex`].
//!
//! - [`search`]: Tools for running parallelized search operations.
//!   - [`search::graph`]: Built-in utilities for working with [`diskann::graph::DiskANNIndex`].
//!
//! - [`streaming`]: Tools for running streaming workloads consisting of inserts, deletes,
//!   replaces, searches, etc.
//!   - [`streaming::runbooks`]: Built-in [`streaming::Executor`]s for dynamic operations.
//!     - [`streaming::runbooks::bigann`]: BigANN style runbook support.
//!   - [`streaming::graph`]: Built-in utilities for working with [`diskann::graph::DiskANNIndex`].
//!
//! # Tools
//!
//! - [`recall`]: KNN-Recall and other accuracy measures.
//! - [`tokio`]: Quickly create new [`tokio::runtime::Runtime]`s.
//!
//! # Error Handling
//!
//! Index benchmark operations typically live high in a program's call stack and need to
//! support a wide variety of index implementations and thus error types. To that end,
//! [`anyhow::Error`] is typically used at API boundaries. While this does hide the ways
//! in which method can fail, the [`anyhow::Error`] type balances generality and fidelity.

mod internal;
pub(crate) mod utils;

// Public Utility Modules
pub mod recall;
pub mod tokio;

// Algorithms
pub mod build;
pub mod search;
pub mod streaming;

/// # Notes on Testing
///
/// Some components in thsi framework (e.g. runbook parsing), have UX tests to report errors
/// encountered during parsing. The necessary input files and expected outputs are checked
/// in to the repo in the `tests` directory.
///
/// If error message change, the expected results can generally be regenerated by running
/// the test suite with the environment variable
/// ```text
/// DISKANN_BENCHMARK_TEST=true
/// ```
/// set.
#[cfg(test)]
mod ux {
    const ENV_VAR: &str = "DISKANN_BENCHMARK_TEST";

    /// Check if we should overwrite expected files.
    pub(crate) fn should_overwrite() -> bool {
        match std::env::var(ENV_VAR) {
            Ok(v) if v == "overwrite" => true,
            Ok(v) => panic!(
                "Unknown value for {}: \"{}\". Expected \"overwrite\"",
                ENV_VAR, v
            ),
            Err(std::env::VarError::NotPresent) => false,
            Err(std::env::VarError::NotUnicode(_)) => {
                panic!("Value for {} is not unicode", ENV_VAR)
            }
        }
    }

    pub(crate) fn help() -> String {
        format!("{}=overwrite", ENV_VAR)
    }

    pub(crate) fn test_dir() -> std::path::PathBuf {
        let manifest: &std::path::Path = env!("CARGO_MANIFEST_DIR").as_ref();
        manifest.join("tests")
        // let test_data_path: PathBuf = format!("{}/tests/{}", manifest_dir, TEST_DATA_DIR).into();
    }
}