velesdb-core 1.7.0

High-performance vector database engine written in Rust
Documentation
//! Native HNSW inner implementation - replaces `hnsw_rs` dependency.
//!
//! This module provides `NativeHnswInner`, a drop-in replacement for `HnswInner`
//! that uses our native HNSW implementation instead of the `hnsw_rs` crate.

// Temporarily allow dead_code until integration into HnswIndex
#![allow(dead_code)]
#![allow(clippy::cast_precision_loss)]

use super::native::{CachedSimdDistance, NativeHnsw, NativeNeighbour};
use crate::distance::DistanceMetric;
use std::path::Path;

/// Native HNSW index wrapper to handle different distance metrics.
///
/// This is the native equivalent of `HnswInner`, using our own HNSW implementation
/// instead of `hnsw_rs`. It provides the same API for seamless integration.
pub struct NativeHnswInner {
    /// The underlying native HNSW index (cached fn pointers for zero-dispatch)
    inner: NativeHnsw<CachedSimdDistance>,
    /// The distance metric used
    metric: DistanceMetric,
}

impl NativeHnswInner {
    /// Creates a new `NativeHnswInner` with the specified metric and parameters.
    ///
    /// # Errors
    ///
    /// Returns an error if vector storage pre-allocation fails.
    pub fn new(
        metric: DistanceMetric,
        max_connections: usize,
        max_elements: usize,
        ef_construction: usize,
        dimension: usize,
    ) -> crate::error::Result<Self> {
        let distance = CachedSimdDistance::new(metric, dimension);
        let inner = if dimension > 0 {
            NativeHnsw::new_with_dimension(
                distance,
                max_connections,
                ef_construction,
                max_elements,
                dimension,
            )?
        } else {
            NativeHnsw::new(distance, max_connections, ef_construction, max_elements)
        };

        Ok(Self { inner, metric })
    }

    /// Searches the HNSW graph and returns raw neighbors with distances.
    #[inline]
    #[must_use]
    pub fn search(&self, query: &[f32], k: usize, ef_search: usize) -> Vec<NativeNeighbour> {
        self.inner.search_neighbours(query, k, ef_search)
    }

    /// Inserts a single vector into the HNSW graph.
    ///
    /// The caller supplies `(vector, expected_idx)` where `expected_idx` is the
    /// internal index pre-registered in `ShardedMappings`. The native graph
    /// auto-assigns sequential node IDs; this method verifies that the assigned
    /// ID matches the expected index to detect mapping desynchronisation early.
    ///
    /// # Errors
    ///
    /// Returns an error if allocation, insertion, or ID-mapping consistency fails.
    pub fn insert(&self, data: (&[f32], usize)) -> crate::error::Result<usize> {
        let (vector, expected_idx) = data;
        let assigned_id = self.inner.insert(vector)?;
        if assigned_id != expected_idx {
            tracing::warn!(
                "NativeHnsw node_id mismatch: expected {expected_idx}, got {assigned_id} \
                 — mapping may be desynchronised under concurrent inserts"
            );
        }
        Ok(assigned_id)
    }

    /// Parallel batch insert into the HNSW graph.
    ///
    /// # Errors
    ///
    /// Returns an error if any insertion fails.
    pub fn parallel_insert(&self, data: &[(&[f32], usize)]) -> crate::error::Result<()> {
        self.inner.parallel_insert(data)
    }

    /// Sets the index to searching mode after bulk insertions.
    ///
    /// Note: This is a no-op for our native implementation.
    pub fn set_searching_mode(&mut self, mode: bool) {
        self.inner.set_searching_mode(mode);
    }

    /// Dumps the HNSW graph to files for persistence.
    ///
    /// # Errors
    ///
    /// Returns `io::Error` if file operations fail.
    pub fn file_dump(&self, path: &Path, basename: &str) -> std::io::Result<()> {
        self.inner.file_dump(path, basename)
    }

    /// Loads the HNSW graph from files.
    ///
    /// # Errors
    ///
    /// Returns `io::Error` if file operations fail or data is corrupted.
    pub fn file_load(
        path: &Path,
        basename: &str,
        metric: DistanceMetric,
        dimension: usize,
    ) -> std::io::Result<Self> {
        let distance = CachedSimdDistance::new(metric, dimension);
        let inner = NativeHnsw::file_load(path, basename, distance)?;

        Ok(Self { inner, metric })
    }

    /// Transforms raw HNSW distance to the appropriate score based on metric type.
    ///
    /// - **Cosine**: `(1.0 - distance).clamp(0.0, 1.0)` (similarity in `[0,1]`)
    /// - **Euclidean**/**Hamming**/**Jaccard**: raw distance (lower is better)
    /// - **`DotProduct`**: `-distance` (negated for consistency)
    #[inline]
    #[must_use]
    pub fn transform_score(&self, raw_distance: f32) -> f32 {
        self.inner.transform_score(raw_distance)
    }

    /// Returns the number of elements in the index.
    #[inline]
    #[must_use]
    pub fn len(&self) -> usize {
        self.inner.len()
    }

    /// Returns true if the index is empty.
    #[inline]
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }

    /// Returns the distance metric used by this index.
    #[inline]
    #[must_use]
    pub fn metric(&self) -> DistanceMetric {
        self.metric
    }

    /// Computes the distance between two vectors using the index's distance metric.
    ///
    /// This is useful for brute-force search where we need to compute distances
    /// outside of the HNSW graph traversal.
    #[inline]
    #[must_use]
    pub fn compute_distance(&self, a: &[f32], b: &[f32]) -> f32 {
        self.inner.compute_distance(a, b)
    }

    /// Executes a closure with zero-copy access to the contiguous vector storage.
    ///
    /// F-05: Enables zero-copy reranking by providing direct `&[f32]` slices
    /// from `ContiguousVectors` instead of cloning via `ShardedVectors::get()`.
    ///
    /// Returns `R::default()` if vector storage is not yet initialized.
    #[inline]
    pub fn with_contiguous_vectors<R: Default>(
        &self,
        f: impl FnOnce(&crate::perf_optimizations::ContiguousVectors) -> R,
    ) -> R {
        self.inner.with_vectors_read(f)
    }
}

// ============================================================================
// Send + Sync for thread safety
// ============================================================================

// SAFETY: `NativeHnswInner` is `Send` because ownership transfer preserves invariants.
// - Condition 1: Internal mutability is synchronized via `parking_lot::RwLock`/atomics.
// - Condition 2: No thread-affine resources are stored in the wrapper.
// Reason: Moving the index wrapper between threads is sound.
unsafe impl Send for NativeHnswInner {}
// SAFETY: `NativeHnswInner` is `Sync` because shared references are concurrency-safe.
// - Condition 1: Concurrent access to mutable graph state is lock/atomic protected.
// - Condition 2: Exposed APIs do not bypass synchronization primitives.
// Reason: `&NativeHnswInner` can be shared safely across threads.
unsafe impl Sync for NativeHnswInner {}

// ============================================================================
// Tests moved to native_inner_tests.rs per project rules