vicinity 0.6.2 - Docs.rs

//! ADSampling: adaptive early termination for graph-based ANN search.
//!
//! Implements the distance comparison operation (DCO) from Gao & Long, SIGMOD 2023.
//! Instead of computing the full distance between a query and a candidate, ADSampling
//! evaluates dimensions in batches and applies a statistical test to reject candidates
//! early when partial evidence indicates they are far from the query.
//!
//! The technique is most effective at high dimensionality (D >= 300). At D=960 (GIST),
//! expect 3-4x speedup over exact distance; at D=128 (SIFT), expect ~1.5x.
//!
//! # Preprocessing
//!
//! A random orthogonal rotation is applied to all stored vectors and queries so that
//! partial sums over the first `d` dimensions are unbiased estimators of the full distance.
//! Without rotation, correlated dimensions (e.g., PCA-ordered) bias the estimator.
//!
//! # References
//!
//! Gao, J. & Long, C. (2023). *High-Dimensional Approximate Nearest Neighbor Search:
//! with Reliable and Efficient Distance Comparison Operations.* SIGMOD 2023.

use crate::error::RetrieveError;

/// Parameters for ADSampling search.
#[derive(Debug, Clone)]
pub struct ADSamplingParams {
    /// Confidence coefficient (epsilon_0). Higher = more conservative (fewer false
    /// rejections, less speedup). Lower = more aggressive (faster, slight recall loss).
    /// Default: 2.1 (from the paper).
    pub epsilon0: f32,

    /// Number of dimensions evaluated per batch before re-checking the threshold.
    /// Should align with SIMD width. Default: 32 (8 floats x 4 = AVX width).
    pub delta_d: usize,

    /// Seed for generating the random rotation matrix. Using the same seed produces
    /// the same rotation, ensuring reproducible results.
    pub seed: u64,
}

impl Default for ADSamplingParams {
    fn default() -> Self {
        Self {
            epsilon0: 2.1,
            delta_d: 32,
            seed: 42,
        }
    }
}

/// Precomputed state for ADSampling search.
///
/// Holds the random orthogonal rotation matrix and the rotated copy of the database
/// vectors. Wraps an existing [`HNSWIndex`](crate::hnsw::HNSWIndex) to provide
/// accelerated search via partial distance evaluation.
pub struct ADSamplingState {
    /// Row-major transposed rotation matrix Q^T, D x D.
    /// Stored transposed so that rotate_vector is a row-wise GEMV (contiguous access).
    rotation_t: Vec<f32>,
    /// Rotated database vectors, stored flat (n * D).
    rotated_vectors: Vec<f32>,
    /// Vector dimensionality.
    dimension: usize,
    /// Number of vectors.
    num_vectors: usize,
    /// Parameters.
    params: ADSamplingParams,
    /// Precomputed ratio table: ratio[i] for i in (0..D/delta_d).
    /// ratio(D, d) = (d/D) * (1 + epsilon0/sqrt(d))^2
    ratio_table: Vec<f32>,
}

impl ADSamplingState {
    /// Build ADSampling state from raw vectors.
    ///
    /// Generates a random orthogonal rotation matrix and rotates all vectors.
    /// This is a one-time O(n * D^2) preprocessing step.
    pub fn new(vectors: &[f32], dimension: usize, params: ADSamplingParams) -> Self {
        let num_vectors = vectors.len() / dimension;
        let rotation = generate_orthogonal_rotation(dimension, params.seed);
        // Transpose Q so rotate_vector can do contiguous row reads + innr::dot.
        let rotation_t = transpose_square(&rotation, dimension);
        let rotated_vectors = rotate_all(vectors, &rotation_t, dimension, num_vectors);

        // Precompute ratio table for each batch checkpoint.
        // Use div_ceil so that dim < delta_d still gets one batch.
        let num_batches = dimension.div_ceil(params.delta_d.max(1));
        let dim_f = dimension as f32;
        let eps = params.epsilon0;
        let mut ratio_table = Vec::with_capacity(num_batches);
        for batch_idx in 1..=num_batches {
            let d = (batch_idx * params.delta_d) as f32;
            let correction = 1.0 + eps / d.sqrt();
            ratio_table.push((d / dim_f) * correction * correction);
        }

        Self {
            rotation_t,
            rotated_vectors,
            dimension,
            num_vectors,
            params,
            ratio_table,
        }
    }

    /// Build ADSampling state from a built HNSW index.
    ///
    /// **Use this instead of `new()` when pairing with `search_hnsw()`.**
    /// After `HNSWIndex::build()`, vectors are reordered for cache locality.
    /// The internal node IDs used by `search_with_distance` correspond to
    /// positions in the reordered array, not the original insertion order.
    /// This constructor reads the reordered vectors directly from the index.
    #[cfg(feature = "hnsw")]
    pub fn from_hnsw(index: &crate::hnsw::HNSWIndex, params: ADSamplingParams) -> Self {
        Self::new(index.vectors_raw(), index.dimension, params)
    }

    /// Build ADSampling state with automatic `delta_d` tuning via spectral analysis.
    ///
    /// Computes the covariance spectrum of a sample of vectors, then uses
    /// the Marchenko-Pastur bulk edge to estimate how many dimensions carry signal.
    /// Sets `delta_d` to the signal dimension count (clamped to [16, 64] for SIMD
    /// alignment). Noise dimensions are where early termination saves the most work.
    ///
    /// Requires the `rmt-spectral` feature.
    #[cfg(feature = "rmt-spectral")]
    pub fn new_auto(vectors: &[f32], dimension: usize, params: ADSamplingParams) -> Self {
        let num_vectors = vectors.len() / dimension;

        // Sample up to 5000 vectors for covariance estimation.
        let sample_n = num_vectors.min(5000);
        let signal_dims = estimate_signal_dimensions(vectors, dimension, sample_n);

        // Clamp to SIMD-friendly range [16, 64].
        let delta_d = signal_dims.clamp(16, 64);
        // Round to nearest multiple of 16 for SIMD alignment.
        let delta_d = ((delta_d + 8) / 16) * 16;

        let tuned_params = ADSamplingParams { delta_d, ..params };
        Self::new(vectors, dimension, tuned_params)
    }

    /// Rotate a query vector using the stored rotation matrix.
    #[must_use]
    pub fn rotate_query(&self, query: &[f32]) -> Vec<f32> {
        rotate_vector(query, &self.rotation_t, self.dimension)
    }

    /// Get the rotated vector for a given internal ID.
    #[inline]
    fn rotated_vector(&self, id: u32) -> &[f32] {
        let start = id as usize * self.dimension;
        &self.rotated_vectors[start..start + self.dimension]
    }

    /// Adaptive distance comparison.
    ///
    /// Returns `Some(exact_distance)` if the candidate passes (is potentially near),
    /// or `None` if the candidate is rejected (provably far given the threshold).
    ///
    /// `threshold` is the current worst distance in the result set (the k-th nearest).
    /// When the result set is not yet full, pass `f32::INFINITY` to disable early exit.
    #[inline]
    pub fn dist_comp(
        &self,
        rotated_query: &[f32],
        candidate_id: u32,
        threshold: f32,
    ) -> Option<f32> {
        let candidate = self.rotated_vector(candidate_id);
        let delta_d = self.params.delta_d;
        let dim = self.dimension;

        // The threshold from the caller is in L2 scale (sqrt). Convert to L2²
        // for the partial-sum comparison, since we accumulate squared diffs.
        let threshold_sq = threshold * threshold;
        let can_reject = threshold_sq > 1e-10;

        let mut partial_sum: f32 = 0.0;
        let mut offset = 0;

        for (batch_idx, &ratio) in self.ratio_table.iter().enumerate() {
            let end = ((batch_idx + 1) * delta_d).min(dim);
            // Accumulate squared differences for this batch.
            for i in offset..end {
                let diff = rotated_query[i] - candidate[i];
                partial_sum += diff * diff;
            }
            offset = end;

            // Statistical test: if partial L2² exceeds threshold² * ratio, reject.
            if can_reject && partial_sum >= threshold_sq * ratio {
                return None;
            }
        }

        // Return L2 distance (sqrt) to match the scale HNSW graphs are built for.
        Some(partial_sum.sqrt())
    }

    /// Search an HNSW index using ADSampling for accelerated distance computation.
    ///
    /// The index must already be built. Upper-layer navigation uses exact distance;
    /// base-layer beam search uses ADSampling with early termination.
    #[cfg(feature = "hnsw")]
    pub fn search_hnsw(
        &self,
        index: &crate::hnsw::HNSWIndex,
        query: &[f32],
        k: usize,
        ef: usize,
    ) -> Result<Vec<(u32, f32)>, RetrieveError> {
        if !index.is_built() {
            return Err(RetrieveError::InvalidParameter(
                "index must be built before search".into(),
            ));
        }
        if index.num_vectors == 0 {
            return Err(RetrieveError::EmptyIndex);
        }

        let rotated_query = self.rotate_query(query);

        // Track the k-th best accepted distance for early termination.
        // Uses a small sorted Vec instead of RefCell<BinaryHeap> -- avoids
        // runtime borrow checks (~1800/query) and heap allocation overhead.
        let top_k = std::cell::RefCell::new(Vec::<f32>::with_capacity(k + 1));
        let k_for_threshold = k;

        let dist_fn = |_q: &[f32], node_id: u32| -> f32 {
            let threshold = {
                let v = top_k.borrow();
                if v.len() >= k_for_threshold {
                    v[k_for_threshold - 1]
                } else {
                    f32::INFINITY
                }
            };
            match self.dist_comp(&rotated_query, node_id, threshold) {
                Some(exact_dist) => {
                    let mut v = top_k.borrow_mut();
                    let pos = v.partition_point(|&d| d < exact_dist);
                    v.insert(pos, exact_dist);
                    if v.len() > k_for_threshold {
                        v.pop();
                    }
                    exact_dist
                }
                None => f32::INFINITY,
            }
        };

        index.search_with_distance(query, k, ef, &dist_fn)
    }

    /// Search a Vamana index using ADSampling.
    #[cfg(feature = "vamana")]
    pub fn search_vamana(
        &self,
        index: &crate::vamana::VamanaIndex,
        query: &[f32],
        k: usize,
        ef: usize,
    ) -> Result<Vec<(u32, f32)>, RetrieveError> {
        let rotated_query = self.rotate_query(query);
        let top_k = std::cell::RefCell::new(Vec::<f32>::with_capacity(k + 1));
        let k_for_threshold = k;

        let dist_fn = |_q: &[f32], node_id: u32| -> f32 {
            let threshold = {
                let v = top_k.borrow();
                if v.len() >= k_for_threshold {
                    v[k_for_threshold - 1]
                } else {
                    f32::INFINITY
                }
            };
            match self.dist_comp(&rotated_query, node_id, threshold) {
                Some(exact_dist) => {
                    let mut v = top_k.borrow_mut();
                    let pos = v.partition_point(|&d| d < exact_dist);
                    v.insert(pos, exact_dist);
                    if v.len() > k_for_threshold {
                        v.pop();
                    }
                    exact_dist
                }
                None => f32::INFINITY,
            }
        };

        index.search_with_distance(query, k, ef, &dist_fn)
    }

    /// Search an NSG index using ADSampling.
    #[cfg(feature = "nsg")]
    pub fn search_nsg(
        &self,
        index: &crate::nsg::NsgIndex,
        query: &[f32],
        k: usize,
        ef: usize,
    ) -> Result<Vec<(u32, f32)>, RetrieveError> {
        let rotated_query = self.rotate_query(query);
        let top_k = std::cell::RefCell::new(Vec::<f32>::with_capacity(k + 1));
        let k_for_threshold = k;

        let dist_fn = |_q: &[f32], node_id: u32| -> f32 {
            let threshold = {
                let v = top_k.borrow();
                if v.len() >= k_for_threshold {
                    v[k_for_threshold - 1]
                } else {
                    f32::INFINITY
                }
            };
            match self.dist_comp(&rotated_query, node_id, threshold) {
                Some(exact_dist) => {
                    let mut v = top_k.borrow_mut();
                    let pos = v.partition_point(|&d| d < exact_dist);
                    v.insert(pos, exact_dist);
                    if v.len() > k_for_threshold {
                        v.pop();
                    }
                    exact_dist
                }
                None => f32::INFINITY,
            }
        };

        index.search_with_distance(query, k, ef, &dist_fn)
    }

    /// Number of vectors in this ADSampling state.
    #[must_use]
    pub fn num_vectors(&self) -> usize {
        self.num_vectors
    }

    /// Dimensionality of the vectors.
    #[must_use]
    pub fn dimension(&self) -> usize {
        self.dimension
    }
}

// ─── Random orthogonal rotation ─────────────────────────────────────────────

/// Generate a D x D random orthogonal matrix via QR decomposition of a random
/// Gaussian matrix. Uses a simple LCG + Box-Muller for reproducibility.
fn generate_orthogonal_rotation(dim: usize, seed: u64) -> Vec<f32> {
    // Generate random Gaussian matrix.
    let mut rng = LcgRng::new(seed);
    let mut matrix = vec![0.0f32; dim * dim];
    for val in matrix.iter_mut() {
        *val = rng.next_gaussian() as f32;
    }

    // QR decomposition via modified Gram-Schmidt.
    let mut q = vec![0.0f32; dim * dim];
    for col in 0..dim {
        // Copy column.
        for row in 0..dim {
            q[row * dim + col] = matrix[row * dim + col];
        }

        // Orthogonalize against previous columns.
        for prev in 0..col {
            let mut dot = 0.0f32;
            for row in 0..dim {
                dot += q[row * dim + prev] * q[row * dim + col];
            }
            for row in 0..dim {
                q[row * dim + col] -= dot * q[row * dim + prev];
            }
        }

        // Normalize.
        let mut norm = 0.0f32;
        for row in 0..dim {
            norm += q[row * dim + col] * q[row * dim + col];
        }
        let norm = norm.sqrt();
        if norm > 1e-10 {
            for row in 0..dim {
                q[row * dim + col] /= norm;
            }
        }
    }

    q
}

/// Transpose a dim x dim row-major matrix in place.
fn transpose_square(m: &[f32], dim: usize) -> Vec<f32> {
    let mut t = vec![0.0f32; dim * dim];
    for i in 0..dim {
        for j in 0..dim {
            t[i * dim + j] = m[j * dim + i];
        }
    }
    t
}

/// Rotate a single vector: result[i] = dot(Q_T[i], v) where Q_T is row-major.
fn rotate_vector(v: &[f32], rotation_t: &[f32], dim: usize) -> Vec<f32> {
    let mut result = vec![0.0f32; dim];
    for i in 0..dim {
        let row = &rotation_t[i * dim..(i + 1) * dim];
        #[cfg(feature = "innr")]
        {
            result[i] = innr::dot(row, v);
        }
        #[cfg(not(feature = "innr"))]
        {
            let mut sum = 0.0f32;
            for j in 0..dim {
                sum += row[j] * v[j];
            }
            result[i] = sum;
        }
    }
    result
}

/// Rotate all vectors in a flat array.
fn rotate_all(vectors: &[f32], rotation_t: &[f32], dim: usize, n: usize) -> Vec<f32> {
    let mut result = vec![0.0f32; n * dim];
    for idx in 0..n {
        let src = &vectors[idx * dim..(idx + 1) * dim];
        let dst = &mut result[idx * dim..(idx + 1) * dim];
        for i in 0..dim {
            let row = &rotation_t[i * dim..(i + 1) * dim];
            #[cfg(feature = "innr")]
            {
                dst[i] = innr::dot(row, src);
            }
            #[cfg(not(feature = "innr"))]
            {
                let mut sum = 0.0f32;
                for j in 0..dim {
                    sum += row[j] * src[j];
                }
                dst[i] = sum;
            }
        }
    }
    result
}

/// Minimal LCG RNG for reproducible rotation matrix generation.
struct LcgRng {
    state: u64,
    has_spare: bool,
    spare: f64,
}

impl LcgRng {
    fn new(seed: u64) -> Self {
        Self {
            state: seed,
            has_spare: false,
            spare: 0.0,
        }
    }

    fn next_u64(&mut self) -> u64 {
        self.state = self
            .state
            .wrapping_mul(6364136223846793005)
            .wrapping_add(1442695040888963407);
        self.state
    }

    fn next_uniform(&mut self) -> f64 {
        (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
    }

    /// Box-Muller transform for Gaussian samples.
    fn next_gaussian(&mut self) -> f64 {
        if self.has_spare {
            self.has_spare = false;
            return self.spare;
        }

        loop {
            let u = self.next_uniform() * 2.0 - 1.0;
            let v = self.next_uniform() * 2.0 - 1.0;
            let s = u * u + v * v;
            if s > 0.0 && s < 1.0 {
                let factor = (-2.0 * s.ln() / s).sqrt();
                self.spare = v * factor;
                self.has_spare = true;
                return u * factor;
            }
        }
    }
}

/// Estimate the number of signal dimensions via Marchenko-Pastur spectral analysis.
///
/// Computes per-dimension variance of the first `sample_n` vectors, treats them as
/// eigenvalues of the sample covariance matrix (diagonal approximation), and counts
/// how many exceed the MP bulk edge. This is a fast heuristic -- the full covariance
/// would require O(d^2 * n) and an eigendecomposition.
#[cfg(feature = "rmt-spectral")]
fn estimate_signal_dimensions(vectors: &[f32], dim: usize, sample_n: usize) -> usize {
    // Compute per-dimension mean and variance.
    let mut means = vec![0.0f64; dim];
    let mut vars = vec![0.0f64; dim];

    for i in 0..sample_n {
        let v = &vectors[i * dim..(i + 1) * dim];
        for (j, &val) in v.iter().enumerate() {
            means[j] += val as f64;
        }
    }
    let n = sample_n as f64;
    for m in means.iter_mut() {
        *m /= n;
    }
    for i in 0..sample_n {
        let v = &vectors[i * dim..(i + 1) * dim];
        for (j, &val) in v.iter().enumerate() {
            let diff = val as f64 - means[j];
            vars[j] += diff * diff;
        }
    }
    for v in vars.iter_mut() {
        *v /= n - 1.0;
    }

    // Use per-dimension variances as proxy eigenvalues.
    // MP ratio gamma = d/n (features / samples).
    let ratio = dim as f64 / n;
    // Assume unit noise variance (median variance as sigma^2 estimate).
    let mut sorted_vars = vars.clone();
    sorted_vars.sort_unstable_by(|a, b| a.total_cmp(b));
    let sigma_sq = sorted_vars[dim / 2]; // median

    let outliers = crate::spectral::count_mp_outliers(&vars, ratio, sigma_sq, 1.0);

    // Signal dimensions = outliers; clamp to reasonable range.
    outliers.max(16).min(dim)
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::*;

    #[test]
    fn rotation_is_orthogonal() {
        let dim = 32;
        let q = generate_orthogonal_rotation(dim, 42);

        // Q^T * Q should be approximately I.
        for i in 0..dim {
            for j in 0..dim {
                let mut dot = 0.0f32;
                for k in 0..dim {
                    dot += q[k * dim + i] * q[k * dim + j];
                }
                let expected = if i == j { 1.0 } else { 0.0 };
                assert!(
                    (dot - expected).abs() < 1e-4,
                    "Q^T*Q[{i},{j}] = {dot}, expected {expected}"
                );
            }
        }
    }

    #[test]
    fn rotation_preserves_distance() {
        let dim = 64;
        let params = ADSamplingParams {
            seed: 123,
            ..Default::default()
        };
        let rotation = generate_orthogonal_rotation(dim, params.seed);
        let rotation_t = transpose_square(&rotation, dim);

        let a: Vec<f32> = (0..dim).map(|i| (i as f32) * 0.1).collect();
        let b: Vec<f32> = (0..dim).map(|i| (i as f32) * 0.1 + 1.0).collect();

        let ra = rotate_vector(&a, &rotation_t, dim);
        let rb = rotate_vector(&b, &rotation_t, dim);

        let orig_dist: f32 = a.iter().zip(&b).map(|(x, y)| (x - y) * (x - y)).sum();
        let rot_dist: f32 = ra.iter().zip(&rb).map(|(x, y)| (x - y) * (x - y)).sum();

        assert!(
            (orig_dist - rot_dist).abs() < 1e-2,
            "orig={orig_dist}, rotated={rot_dist}"
        );
    }

    #[test]
    fn dist_comp_accepts_near_neighbor() {
        let dim = 64;
        let params = ADSamplingParams::default();

        // Two nearby vectors.
        let a: Vec<f32> = vec![1.0; dim];
        let b: Vec<f32> = vec![1.01; dim];
        let vectors: Vec<f32> = [a.clone(), b].concat();

        let state = ADSamplingState::new(&vectors, dim, params);
        let rq = state.rotate_query(&a);

        // With a generous threshold, the near neighbor should be accepted.
        let result = state.dist_comp(&rq, 1, f32::INFINITY);
        assert!(result.is_some(), "near neighbor should be accepted");
    }

    #[test]
    fn dist_comp_rejects_far_candidate() {
        let dim = 128;
        let params = ADSamplingParams::default();

        let near: Vec<f32> = vec![0.0; dim];
        let far: Vec<f32> = vec![100.0; dim];
        let vectors: Vec<f32> = [near.clone(), far].concat();

        let state = ADSamplingState::new(&vectors, dim, params);
        let rq = state.rotate_query(&near);

        // With a tight threshold (distance to self = 0), the far vector should be rejected.
        let result = state.dist_comp(&rq, 1, 0.01);
        assert!(result.is_none(), "far candidate should be rejected");
    }

    #[test]
    fn exact_distance_when_no_early_exit() {
        let dim = 32; // Small dim, delta_d=32, so only one batch -- no early exit possible.
        let params = ADSamplingParams {
            delta_d: 32,
            ..Default::default()
        };

        let a: Vec<f32> = (0..dim).map(|i| i as f32).collect();
        let b: Vec<f32> = (0..dim).map(|i| (i as f32) + 0.5).collect();
        let vectors: Vec<f32> = [a.clone(), b.clone()].concat();

        let state = ADSamplingState::new(&vectors, dim, params);
        let rq = state.rotate_query(&a);

        // With infinite threshold, must return exact distance.
        let dist = state.dist_comp(&rq, 1, f32::INFINITY).unwrap();

        // dist_comp returns L2 (sqrt of sum of squared diffs) in rotated space.
        // Rotation preserves L2, so this should equal the original L2 distance.
        let expected_sq: f32 = a.iter().zip(&b).map(|(x, y)| (x - y) * (x - y)).sum();
        let expected = expected_sq.sqrt();
        assert!(
            (dist - expected).abs() < 1e-1,
            "dist={dist}, expected={expected}"
        );
    }

    #[cfg(feature = "hnsw")]
    #[test]
    fn search_hnsw_with_adsampling() {
        use crate::hnsw::{HNSWIndex, HNSWParams};

        let dim = 64;
        let n = 200;
        let mut rng = LcgRng::new(99);
        let vectors: Vec<f32> = (0..n * dim).map(|_| rng.next_uniform() as f32).collect();

        // Normalize for cosine distance.
        let mut normalized = vectors.clone();
        for i in 0..n {
            let slice = &mut normalized[i * dim..(i + 1) * dim];
            let norm: f32 = slice.iter().map(|x| x * x).sum::<f32>().sqrt();
            if norm > 0.0 {
                for x in slice.iter_mut() {
                    *x /= norm;
                }
            }
        }

        let params = HNSWParams {
            m: 16,
            ef_construction: 100,
            seed: Some(42),
            ..Default::default()
        };
        let mut index = HNSWIndex::with_params(dim, params).unwrap();
        let ids: Vec<u32> = (0..n as u32).collect();
        index.add_batch(&ids, &normalized).unwrap();
        let _ = index.build();

        // Must use from_hnsw after build() because build reorders vectors.
        let state = ADSamplingState::from_hnsw(&index, ADSamplingParams::default());

        let query = &normalized[0..dim];
        let results = state.search_hnsw(&index, query, 10, 50).unwrap();

        assert!(!results.is_empty());
        // The query's own vector should be in the top results (distance ~ 0).
        assert!(
            results[0].1 < 0.01,
            "self-match distance should be ~0, got {}",
            results[0].1
        );
    }

    #[cfg(feature = "hnsw")]
    #[test]
    fn search_hnsw_l2_unnormalized() {
        use crate::hnsw::{HNSWIndex, HNSWParams};

        let dim = 128;
        let n = 500;
        let mut rng = LcgRng::new(77);
        let vectors: Vec<f32> = (0..n * dim)
            .map(|_| rng.next_uniform() as f32 * 255.0)
            .collect();

        let params = HNSWParams {
            m: 16,
            ef_construction: 100,
            metric: crate::DistanceMetric::L2,
            seed: Some(42),
            ..Default::default()
        };
        let mut index = HNSWIndex::with_params(dim, params).unwrap();
        let ids: Vec<u32> = (0..n as u32).collect();
        index.add_batch(&ids, &vectors).unwrap();
        let _ = index.build();

        // Verify HNSW alone works
        let hnsw_results = index.search(&vectors[0..dim], 10, 100).unwrap();
        assert!(!hnsw_results.is_empty(), "HNSW should return results");

        // Build ADSampling from the HNSW's reordered vectors (critical!)
        let state = ADSamplingState::from_hnsw(
            &index,
            ADSamplingParams {
                epsilon0: 2.1,
                ..Default::default()
            },
        );

        let results = state
            .search_hnsw(&index, &vectors[0..dim], 10, 100)
            .unwrap();

        assert!(!results.is_empty(), "ADSampling should return results");

        // Direct comparison: two calls to search_with_distance with the SAME L2 function.
        // If results differ, there's non-determinism in search_with_distance itself.
        let query = &vectors[0..dim];

        let l2_fn = |q: &[f32], nid: u32| -> f32 {
            let v = &index.vectors[nid as usize * dim..(nid as usize + 1) * dim];
            crate::distance::l2_distance(q, v)
        };

        let run1 = index.search_with_distance(query, 10, 100, &l2_fn).unwrap();
        let run2 = index.search_with_distance(query, 10, 100, &l2_fn).unwrap();

        // Both runs should return identical results (deterministic)
        assert_eq!(
            run1.iter().map(|r| r.0).collect::<Vec<_>>(),
            run2.iter().map(|r| r.0).collect::<Vec<_>>(),
            "Two identical search_with_distance calls return different results!\n\
             Run1: {:?}\nRun2: {:?}",
            &run1[..5.min(run1.len())],
            &run2[..5.min(run2.len())]
        );

        // Now compare L2 custom vs standard search
        let hnsw_ids: std::collections::HashSet<u32> = hnsw_results.iter().map(|r| r.0).collect();
        let custom_ids: std::collections::HashSet<u32> = run1.iter().map(|r| r.0).collect();
        let overlap = hnsw_ids.intersection(&custom_ids).count();
        assert!(
            overlap >= 8,
            "search() vs search_with_distance(L2) should agree, got {overlap}/10.\n\
             search():     {:?}\nsearch_w_d(): {:?}",
            &hnsw_results[..5.min(hnsw_results.len())],
            &run1[..5.min(run1.len())]
        );

        // Test: rotated L2 WITHOUT threshold tracking (plain exact distance)
        let rotated_query = state.rotate_query(&vectors[0..dim]);
        let plain_rotated = index
            .search_with_distance(query, 10, 100, &|_q: &[f32], nid: u32| {
                state
                    .dist_comp(&rotated_query, nid, f32::INFINITY)
                    .unwrap_or(f32::INFINITY)
            })
            .unwrap();

        // ADSampling returns different node IDs but equivalent distances due to
        // tie-breaking: floating-point differences in rotated vs original distance
        // cause beam search to explore neighbors in a different order. Verify the
        // k-th distances match within tolerance rather than requiring identical IDs.
        let hnsw_10th = hnsw_results.last().unwrap().1;
        let plain_10th = plain_rotated.last().unwrap().1;
        let ads_10th = results.last().unwrap().1;
        assert!(
            (hnsw_10th - plain_10th).abs() < hnsw_10th * 0.01,
            "Rotated L2 10th-dist should match HNSW within 1%: {hnsw_10th:.2} vs {plain_10th:.2}"
        );
        assert!(
            (hnsw_10th - ads_10th).abs() < hnsw_10th * 0.01,
            "ADSampling 10th-dist should match HNSW within 1%: {hnsw_10th:.2} vs {ads_10th:.2}"
        );
    }

    #[cfg(feature = "rmt-spectral")]
    #[test]
    fn auto_tuning_selects_reasonable_delta_d() {
        let dim = 128;
        let n = 500;
        let mut rng = LcgRng::new(77);

        // Create vectors with clear signal in first 30 dims, noise in rest.
        let mut vectors = vec![0.0f32; n * dim];
        for i in 0..n {
            for j in 0..dim {
                let base = rng.next_uniform() as f32;
                // Signal dims get 10x larger values.
                vectors[i * dim + j] = if j < 30 { base * 10.0 } else { base * 0.1 };
            }
        }

        let state = ADSamplingState::new_auto(&vectors, dim, ADSamplingParams::default());
        // delta_d should be in the SIMD-friendly range, influenced by signal structure.
        assert!(
            state.params.delta_d >= 16 && state.params.delta_d <= 64,
            "delta_d={} not in [16, 64]",
            state.params.delta_d
        );
        // Should be a multiple of 16.
        assert_eq!(
            state.params.delta_d % 16,
            0,
            "delta_d={} not aligned to 16",
            state.params.delta_d
        );
    }
}