vector-index 0.1.0

Generic HNSW vector index with pluggable distance metrics.
Documentation
//! The [`Metric`] trait and built-in implementations.
//!
//! HNSW does not require a true mathematical metric — only a consistent
//! ordering on pairwise distances. In practice we still call the trait
//! `Metric` because every interesting impl (L², cosine-as-distance, SW₁)
//! is at least a pseudometric, and the name is what users expect.
//!
//! # Implementing your own metric
//!
//! ```
//! use vector_index::Metric;
//!
//! struct Hamming;
//!
//! impl Metric for Hamming {
//!     type Point = Vec<u8>;
//!
//!     fn distance(&self, a: &Self::Point, b: &Self::Point) -> f32 {
//!         a.iter().zip(b).filter(|(x, y)| x != y).count() as f32
//!     }
//!
//!     fn dim(&self, point: &Self::Point) -> usize {
//!         point.len()
//!     }
//! }
//! ```
//!
//! Implementations should be deterministic and total: `distance(a, b)` must
//! return the same finite, non-negative `f32` for the same inputs. NaN or
//! negative outputs will produce undefined ordering behavior in HNSW.

/// A distance function over points of an associated type.
///
/// Metrics are passed by reference into search and insert paths, so they
/// should be cheap to dereference. Stateful metrics (e.g. SW₁ with cached
/// random projections) typically hold their state behind an `Arc` or
/// inside the metric struct itself.
///
/// # Required properties
///
/// - **Determinism**: `distance(a, b)` returns the same value for the same
///   inputs across calls.
/// - **Non-negativity**: `distance(a, b) >= 0.0` for all valid inputs.
/// - **Finiteness**: the returned value is never NaN or infinite.
///
/// HNSW assumes but does not verify these properties. Violations cause
/// silent index corruption, not panics.
///
/// Symmetry (`d(a,b) == d(b,a)`) and the triangle inequality are *not*
/// required by HNSW, though most useful metrics satisfy them.
pub trait Metric: Send + Sync {
    /// The point type this metric measures distances between.
    type Point: Send + Sync;

    /// Compute the distance between two points.
    fn distance(&self, a: &Self::Point, b: &Self::Point) -> f32;

    /// Return the number of dimensions / components of `point`.
    ///
    /// Used by [`HnswIndex`](crate::HnswIndex) to enforce dimension
    /// consistency: the first inserted point sets the expected dimension,
    /// and subsequent inserts with a mismatched dimension are rejected.
    fn dim(&self, point: &Self::Point) -> usize;
}

// -----------------------------------------------------------------------------
// Built-in metrics
// -----------------------------------------------------------------------------

/// Squared Euclidean distance over `Vec<f32>` / `[f32]` points.
///
/// Returns Σᵢ (aᵢ − bᵢ)² — *not* the square root. For HNSW this is
/// equivalent (monotonic in the true L² distance) and avoids a sqrt
/// per comparison.
///
/// Panics in debug builds if input lengths differ; in release builds,
/// trailing elements of the longer slice are ignored.
#[derive(Debug, Clone, Copy, Default)]
pub struct L2;

impl Metric for L2 {
    type Point = Vec<f32>;

    fn distance(&self, a: &Self::Point, b: &Self::Point) -> f32 {
        a.iter()
            .zip(b.iter())
            .map(|(x, y)| {
                let d = x - y;
                d * d
            })
            .sum()
    }

    fn dim(&self, point: &Self::Point) -> usize {
        point.len()
    }
}

/// Cosine distance, defined as `1 − cosine_similarity(a, b)`.
///
/// Range is `[0, 2]`. Returns `1.0` if either vector has zero norm
/// (chosen over NaN for HNSW compatibility).
#[derive(Debug, Clone, Copy, Default)]
pub struct Cosine;

impl Metric for Cosine {
    type Point = Vec<f32>;

    fn distance(&self, a: &Self::Point, b: &Self::Point) -> f32 {
        let mut dot = 0.0_f32;
        let mut na = 0.0_f32;
        let mut nb = 0.0_f32;
        for (&x, &y) in a.iter().zip(b.iter()) {
            dot += x * y;
            na += x * x;
            nb += y * y;
        }
        if na == 0.0 || nb == 0.0 {
            return 1.0;
        }
        1.0 - dot / (na.sqrt() * nb.sqrt())
    }

    fn dim(&self, point: &Self::Point) -> usize {
        point.len()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use approx::assert_relative_eq;

    #[test]
    fn l2_known_values() {
        let m = L2;
        assert_relative_eq!(m.distance(&vec![0.0; 4], &vec![0.0; 4]), 0.0);
        assert_relative_eq!(
            m.distance(&vec![1.0, 0.0, 0.0], &vec![0.0, 1.0, 0.0]),
            2.0 // squared distance: 1² + 1² + 0²
        );
    }

    #[test]
    fn cosine_known_values() {
        let m = Cosine;
        // identical direction → distance 0
        assert_relative_eq!(
            m.distance(&vec![1.0, 0.0], &vec![2.0, 0.0]),
            0.0,
            epsilon = 1e-6
        );
        // orthogonal → distance 1
        assert_relative_eq!(
            m.distance(&vec![1.0, 0.0], &vec![0.0, 1.0]),
            1.0,
            epsilon = 1e-6
        );
        // opposite direction → distance 2
        assert_relative_eq!(
            m.distance(&vec![1.0, 0.0], &vec![-1.0, 0.0]),
            2.0,
            epsilon = 1e-6
        );
    }

    #[test]
    fn cosine_zero_vector_does_not_nan() {
        let m = Cosine;
        let d = m.distance(&vec![0.0, 0.0], &vec![1.0, 1.0]);
        assert!(
            d.is_finite(),
            "cosine of zero-vector should not produce NaN"
        );
    }
}