vicinity 0.3.1 - Docs.rs

//! Dual-Branch HNSW with LID-based insertion and skip bridges.
//!
//! # Motivation
//!
//! Standard HNSW struggles with high-LID (Local Intrinsic Dimensionality) points:
//! - These outliers create "dead ends" in the graph
//! - Search paths get trapped in sparse regions
//! - Recall degrades significantly for queries near these points
//!
//! # Key Innovations (arXiv 2501.13992, Jan 2025)
//!
//! 1. **LID-based layer assignment**: Points with high LID get assigned
//!    to higher layers, improving their connectivity.
//!
//! 2. **Skip bridges**: Long-range edges that bypass redundant intermediate
//!    nodes, allowing faster navigation in sparse regions.
//!
//! 3. **Dual-branch search**: Maintains two search fronts - one following
//!    standard HNSW greedy search, one exploring via skip bridges.
//!
//! # Performance
//!
//! On challenging datasets with outliers:
//! - Reported recall improvements at similar latency (see paper for measured deltas)
//! - Particularly effective when intrinsic dimension varies across the dataset
//!
//! # When to Use
//!
//! - Datasets with outliers or varying density
//! - High-dimensional embeddings with complex manifold structure
//! - When standard HNSW shows poor recall on specific query subsets
//!
//! # References
//!
//! - "Dual-Branch HNSW with Skip Bridges" (arXiv:2501.13992) `https://arxiv.org/abs/2501.13992`
//! - Levina & Bickel (2004). "Maximum likelihood estimation of intrinsic dimension." `https://doi.org/10.48550/arXiv.math/0410372`

use crate::distance;
use crate::lid::{estimate_lid_for_hnsw, LidEstimate, LidStats};
use crate::RetrieveError;
use rand::prelude::*;
use std::collections::{BinaryHeap, HashSet};

/// Configuration for Dual-Branch HNSW.
#[derive(Debug, Clone)]
pub struct DualBranchConfig {
    /// Base maximum connections per node (standard HNSW M).
    pub m: usize,
    /// Maximum connections for high-LID nodes (typically 1.5-2x M).
    pub m_high_lid: usize,
    /// Construction-time search width.
    pub ef_construction: usize,
    /// Query-time search width.
    pub ef_search: usize,
    /// Number of neighbors for LID estimation.
    pub lid_k: usize,
    /// Threshold for considering a point "high LID".
    /// Points with LID > median + threshold_sigma * std_dev get extra edges.
    pub lid_threshold_sigma: f32,
    /// Probability of adding skip bridges (0.0 to 1.0).
    pub skip_bridge_probability: f32,
    /// Maximum skip bridge length (in graph hops).
    pub max_skip_length: usize,
    /// Random seed for reproducibility.
    pub seed: Option<u64>,
}

impl Default for DualBranchConfig {
    fn default() -> Self {
        Self {
            m: 16,
            m_high_lid: 24, // 1.5x for high-LID points
            ef_construction: 200,
            ef_search: 50,
            lid_k: 20,
            lid_threshold_sigma: 1.5, // median + 1.5*std
            skip_bridge_probability: 0.1,
            max_skip_length: 3,
            seed: None,
        }
    }
}

/// A skip bridge connecting two distant nodes.
#[derive(Debug, Clone, Copy)]
pub struct SkipBridge {
    /// Source node.
    pub from: u32,
    /// Target node.
    pub to: u32,
    /// Approximate number of hops this bridge shortcuts.
    pub skip_length: u32,
}

/// Dual-Branch HNSW index with LID-aware construction.
#[derive(Debug)]
pub struct DualBranchHNSW {
    /// Vector data in flat layout.
    vectors: Vec<f32>,
    /// Vector dimension.
    dimension: usize,
    /// Number of vectors.
    num_vectors: usize,
    /// Neighbors for each node.
    neighbors: Vec<Vec<u32>>,
    /// Skip bridges for fast navigation.
    skip_bridges: Vec<SkipBridge>,
    /// Skip bridge adjacency: node -> [bridge indices].
    skip_adjacency: Vec<Vec<usize>>,
    /// LID estimate for each node.
    lid_estimates: Vec<LidEstimate>,
    /// LID statistics computed during construction.
    lid_stats: Option<LidStats>,
    /// Configuration.
    config: DualBranchConfig,
    /// Entry point for search.
    entry_point: Option<u32>,
    /// Whether the index has been built.
    built: bool,
}

impl DualBranchHNSW {
    /// Create a new Dual-Branch HNSW index.
    pub fn new(dimension: usize, config: DualBranchConfig) -> Self {
        Self {
            vectors: Vec::new(),
            dimension,
            num_vectors: 0,
            neighbors: Vec::new(),
            skip_bridges: Vec::new(),
            skip_adjacency: Vec::new(),
            lid_estimates: Vec::new(),
            lid_stats: None,
            config,
            entry_point: None,
            built: false,
        }
    }

    /// Add vectors to the index.
    pub fn add_vectors(&mut self, vectors: &[f32]) -> Result<(), RetrieveError> {
        if vectors.len() % self.dimension != 0 {
            return Err(RetrieveError::DimensionMismatch {
                query_dim: vectors.len(),
                doc_dim: self.dimension,
            });
        }

        self.vectors.extend_from_slice(vectors);
        let new_count = vectors.len() / self.dimension;
        self.num_vectors += new_count;
        self.built = false;

        Ok(())
    }

    /// Build the index with LID-aware construction.
    pub fn build(&mut self) -> Result<(), RetrieveError> {
        if self.num_vectors == 0 {
            return Err(RetrieveError::EmptyIndex);
        }

        let mut rng: Box<dyn RngCore> = match self.config.seed {
            Some(s) => Box::new(StdRng::seed_from_u64(s)),
            None => Box::new(rand::rng()),
        };

        // Phase 1: Initial graph construction (standard HNSW-like)
        self.neighbors = vec![Vec::new(); self.num_vectors];
        self.lid_estimates = vec![
            LidEstimate {
                lid: 0.0,
                k: 0,
                max_dist: 0.0
            };
            self.num_vectors
        ];

        // Build incrementally
        for i in 0..self.num_vectors {
            self.insert_node(i as u32, &mut rng)?;
        }

        // Phase 2: Compute LID for all nodes
        self.compute_all_lid();

        // Phase 3: Enhance high-LID nodes with additional edges
        self.enhance_high_lid_nodes(&mut rng)?;

        // Phase 4: Add skip bridges
        self.add_skip_bridges(&mut rng)?;

        self.built = true;
        Ok(())
    }

    /// Insert a single node into the graph.
    fn insert_node(&mut self, node_id: u32, _rng: &mut dyn RngCore) -> Result<(), RetrieveError> {
        // Copy query to owned Vec to avoid borrow issues
        let query: Vec<f32> = self.get_vector(node_id as usize).to_vec();

        if self.entry_point.is_none() {
            self.entry_point = Some(node_id);
            return Ok(());
        }

        // Safe: we checked is_none() above
        let entry = self.entry_point.ok_or(RetrieveError::EmptyIndex)?;

        // Find nearest neighbors using greedy search
        let candidates = self.search_layer(&query, entry, self.config.ef_construction);

        // Select neighbors (simple heuristic: closest M)
        let m = self.config.m;
        let selected: Vec<u32> = candidates.iter().take(m).map(|&(id, _)| id).collect();

        // Collect neighbor updates to avoid borrowing issues
        let mut updates: Vec<(u32, u32)> = Vec::new(); // (from, to)
        let mut prune_list: Vec<usize> = Vec::new();

        for &neighbor in &selected {
            if neighbor != node_id {
                updates.push((node_id, neighbor));

                // Check if reverse edge needed
                if !self.neighbors[neighbor as usize].contains(&node_id) {
                    updates.push((neighbor, node_id));

                    // Check if pruning will be needed
                    if self.neighbors[neighbor as usize].len() + 1 > m * 2 {
                        prune_list.push(neighbor as usize);
                    }
                }
            }
        }

        // Apply updates
        for (from, to) in updates {
            if !self.neighbors[from as usize].contains(&to) {
                self.neighbors[from as usize].push(to);
            }
        }

        // Apply pruning
        for node in prune_list {
            self.prune_neighbors(node, m);
        }

        // Update entry point to closer node
        let entry_dist = distance::l2_distance(&query, self.get_vector(entry as usize));
        if !candidates.is_empty() && candidates[0].1 < entry_dist {
            self.entry_point = Some(candidates[0].0);
        }

        Ok(())
    }

    /// Greedy search within a layer.
    fn search_layer(&self, query: &[f32], entry: u32, ef: usize) -> Vec<(u32, f32)> {
        let mut visited = HashSet::new();
        let mut candidates = BinaryHeap::new();
        let mut results = BinaryHeap::new();

        let entry_dist = distance::l2_distance(query, self.get_vector(entry as usize));
        visited.insert(entry);
        candidates.push(MinCandidate {
            id: entry,
            dist: entry_dist,
        });
        results.push(MaxCandidate {
            id: entry,
            dist: entry_dist,
        });

        while let Some(MinCandidate {
            id: current,
            dist: current_dist,
        }) = candidates.pop()
        {
            // Stop if current is worse than worst in results
            if let Some(worst) = results.peek() {
                if current_dist > worst.dist && results.len() >= ef {
                    break;
                }
            }

            // Explore neighbors
            for &neighbor in &self.neighbors[current as usize] {
                if visited.insert(neighbor) {
                    let dist = distance::l2_distance(query, self.get_vector(neighbor as usize));

                    let should_add =
                        results.len() < ef || results.peek().map(|w| dist < w.dist).unwrap_or(true);

                    if should_add {
                        candidates.push(MinCandidate { id: neighbor, dist });
                        results.push(MaxCandidate { id: neighbor, dist });

                        if results.len() > ef {
                            results.pop();
                        }
                    }
                }
            }
        }

        // Convert to sorted vec
        let mut result_vec: Vec<(u32, f32)> = results.into_iter().map(|c| (c.id, c.dist)).collect();
        result_vec.sort_unstable_by(|a, b| a.1.total_cmp(&b.1));
        result_vec
    }

    /// Prune neighbors to keep at most m.
    fn prune_neighbors(&mut self, node: usize, m: usize) {
        let node_vec = self.get_vector(node);
        let mut neighbors_with_dist: Vec<(u32, f32)> = self.neighbors[node]
            .iter()
            .map(|&n| {
                (
                    n,
                    distance::l2_distance(node_vec, self.get_vector(n as usize)),
                )
            })
            .collect();

        neighbors_with_dist.sort_unstable_by(|a, b| a.1.total_cmp(&b.1));
        self.neighbors[node] = neighbors_with_dist
            .into_iter()
            .take(m)
            .map(|(id, _)| id)
            .collect();
    }

    /// Compute LID for all nodes.
    fn compute_all_lid(&mut self) {
        for i in 0..self.num_vectors {
            let node_vec = self.get_vector(i);

            // Get distances to neighbors
            let neighbor_distances: Vec<f32> = self.neighbors[i]
                .iter()
                .map(|&n| distance::l2_distance(node_vec, self.get_vector(n as usize)))
                .collect();

            if !neighbor_distances.is_empty() {
                self.lid_estimates[i] =
                    estimate_lid_for_hnsw(&neighbor_distances, self.config.lid_k);
            }
        }

        self.lid_stats = Some(LidStats::from_estimates(&self.lid_estimates));
    }

    /// Enhance high-LID nodes with additional edges.
    fn enhance_high_lid_nodes(&mut self, _rng: &mut dyn RngCore) -> Result<(), RetrieveError> {
        let stats = self
            .lid_stats
            .as_ref()
            .ok_or(RetrieveError::InvalidParameter(
                "LID stats not computed".into(),
            ))?;

        let threshold = stats.median + self.config.lid_threshold_sigma * stats.std_dev;

        for i in 0..self.num_vectors {
            if self.lid_estimates[i].lid > threshold {
                // High-LID node: add more neighbors
                let query = self.get_vector(i);
                let entry = self.entry_point.unwrap_or(0);

                // Search for more candidates
                let candidates = self.search_layer(query, entry, self.config.m_high_lid * 2);

                // Add edges to candidates not already connected
                let current_neighbors: HashSet<u32> = self.neighbors[i].iter().copied().collect();
                let mut added = 0;

                for (neighbor, _) in candidates {
                    if neighbor as usize != i
                        && !current_neighbors.contains(&neighbor)
                        && added < self.config.m_high_lid - self.neighbors[i].len()
                    {
                        self.neighbors[i].push(neighbor);
                        self.neighbors[neighbor as usize].push(i as u32);
                        added += 1;
                    }
                }
            }
        }

        Ok(())
    }

    /// Add skip bridges to improve navigation in sparse regions.
    fn add_skip_bridges(&mut self, rng: &mut dyn RngCore) -> Result<(), RetrieveError> {
        self.skip_bridges.clear();
        self.skip_adjacency = vec![Vec::new(); self.num_vectors];

        let stats = self
            .lid_stats
            .as_ref()
            .ok_or(RetrieveError::InvalidParameter(
                "LID stats not computed".into(),
            ))?;

        let threshold = stats.median + self.config.lid_threshold_sigma * stats.std_dev;

        // Add skip bridges from high-LID nodes
        for i in 0..self.num_vectors {
            if self.lid_estimates[i].lid > threshold {
                // Random walk to find distant but reachable nodes
                for _ in 0..3 {
                    if rng.random::<f32>() > self.config.skip_bridge_probability {
                        continue;
                    }

                    if let Some(target) =
                        self.random_walk(i as u32, self.config.max_skip_length, rng)
                    {
                        if target as usize != i && !self.neighbors[i].contains(&target) {
                            let bridge_idx = self.skip_bridges.len();
                            self.skip_bridges.push(SkipBridge {
                                from: i as u32,
                                to: target,
                                skip_length: self.config.max_skip_length as u32,
                            });
                            self.skip_adjacency[i].push(bridge_idx);
                        }
                    }
                }
            }
        }

        Ok(())
    }

    /// Random walk to find a distant node.
    fn random_walk(&self, start: u32, hops: usize, rng: &mut dyn RngCore) -> Option<u32> {
        let mut current = start;

        for _ in 0..hops {
            let neighbors = &self.neighbors[current as usize];
            if neighbors.is_empty() {
                return None;
            }
            current = neighbors[rng.random_range(0..neighbors.len())];
        }

        Some(current)
    }

    /// Search with dual-branch exploration.
    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(u32, f32)>, RetrieveError> {
        if !self.built {
            return Err(RetrieveError::InvalidParameter("index not built".into()));
        }

        if query.len() != self.dimension {
            return Err(RetrieveError::DimensionMismatch {
                query_dim: query.len(),
                doc_dim: self.dimension,
            });
        }

        let entry = self.entry_point.ok_or(RetrieveError::EmptyIndex)?;

        // Dual-branch search
        let mut visited = HashSet::new();
        let mut candidates = BinaryHeap::new();
        let mut results = BinaryHeap::new();
        let ef = self.config.ef_search.max(k);

        // Initialize with entry point
        let entry_dist = distance::l2_distance(query, self.get_vector(entry as usize));
        visited.insert(entry);
        candidates.push(MinCandidate {
            id: entry,
            dist: entry_dist,
        });
        results.push(MaxCandidate {
            id: entry,
            dist: entry_dist,
        });

        while let Some(MinCandidate {
            id: current,
            dist: current_dist,
        }) = candidates.pop()
        {
            if let Some(worst) = results.peek() {
                if current_dist > worst.dist && results.len() >= ef {
                    break;
                }
            }

            // Branch 1: Standard neighbor exploration
            for &neighbor in &self.neighbors[current as usize] {
                if visited.insert(neighbor) {
                    let dist = distance::l2_distance(query, self.get_vector(neighbor as usize));

                    let should_add =
                        results.len() < ef || results.peek().map(|w| dist < w.dist).unwrap_or(true);

                    if should_add {
                        candidates.push(MinCandidate { id: neighbor, dist });
                        results.push(MaxCandidate { id: neighbor, dist });

                        if results.len() > ef {
                            results.pop();
                        }
                    }
                }
            }

            // Branch 2: Skip bridge exploration
            for &bridge_idx in &self.skip_adjacency[current as usize] {
                let bridge = &self.skip_bridges[bridge_idx];
                let target = bridge.to;

                if visited.insert(target) {
                    let dist = distance::l2_distance(query, self.get_vector(target as usize));

                    let should_add =
                        results.len() < ef || results.peek().map(|w| dist < w.dist).unwrap_or(true);

                    if should_add {
                        candidates.push(MinCandidate { id: target, dist });
                        results.push(MaxCandidate { id: target, dist });

                        if results.len() > ef {
                            results.pop();
                        }
                    }
                }
            }
        }

        // Return top k
        let mut result_vec: Vec<(u32, f32)> = results.into_iter().map(|c| (c.id, c.dist)).collect();
        result_vec.sort_unstable_by(|a, b| a.1.total_cmp(&b.1));
        result_vec.truncate(k);
        Ok(result_vec)
    }

    /// Get statistics about the index.
    pub fn stats(&self) -> DualBranchStats {
        let high_lid_count = if let Some(stats) = &self.lid_stats {
            let threshold = stats.median + self.config.lid_threshold_sigma * stats.std_dev;
            self.lid_estimates
                .iter()
                .filter(|e| e.lid > threshold)
                .count()
        } else {
            0
        };

        DualBranchStats {
            num_vectors: self.num_vectors,
            num_edges: self.neighbors.iter().map(|n| n.len()).sum::<usize>() / 2,
            num_skip_bridges: self.skip_bridges.len(),
            high_lid_nodes: high_lid_count,
            lid_stats: self.lid_stats.clone(),
        }
    }

    /// Get vector by index.
    #[inline]
    fn get_vector(&self, idx: usize) -> &[f32] {
        let start = idx * self.dimension;
        &self.vectors[start..start + self.dimension]
    }
}

/// Statistics about a Dual-Branch HNSW index.
#[derive(Debug, Clone)]
pub struct DualBranchStats {
    /// Number of vectors.
    pub num_vectors: usize,
    /// Number of edges in the graph.
    pub num_edges: usize,
    /// Number of skip bridges.
    pub num_skip_bridges: usize,
    /// Number of high-LID nodes.
    pub high_lid_nodes: usize,
    /// LID statistics.
    pub lid_stats: Option<LidStats>,
}

// ─────────────────────────────────────────────────────────────────────────────
// Helper types for priority queues
// ─────────────────────────────────────────────────────────────────────────────

#[derive(Debug, Clone, Copy)]
struct MinCandidate {
    id: u32,
    dist: f32,
}

impl PartialEq for MinCandidate {
    fn eq(&self, other: &Self) -> bool {
        self.dist == other.dist
    }
}

impl Eq for MinCandidate {}

impl PartialOrd for MinCandidate {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for MinCandidate {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        // Reverse for min-heap
        other.dist.total_cmp(&self.dist)
    }
}

#[derive(Debug, Clone, Copy)]
struct MaxCandidate {
    id: u32,
    dist: f32,
}

impl PartialEq for MaxCandidate {
    fn eq(&self, other: &Self) -> bool {
        self.dist == other.dist
    }
}

impl Eq for MaxCandidate {}

impl PartialOrd for MaxCandidate {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for MaxCandidate {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        // Normal order for max-heap
        self.dist.total_cmp(&other.dist)
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::*;

    fn create_clustered_data(n_clusters: usize, points_per_cluster: usize, dim: usize) -> Vec<f32> {
        let mut rng = StdRng::seed_from_u64(42);
        let mut data = Vec::new();

        for c in 0..n_clusters {
            let center: Vec<f32> = (0..dim)
                .map(|_| (c as f32) * 10.0 + rng.random::<f32>())
                .collect();

            for _ in 0..points_per_cluster {
                for c_val in center.iter().take(dim) {
                    data.push(c_val + rng.random::<f32>() * 0.5);
                }
            }
        }

        // Add outliers (high-LID points)
        for _ in 0..5 {
            for _ in 0..dim {
                data.push(rng.random::<f32>() * 100.0);
            }
        }

        data
    }

    #[test]
    fn test_dual_branch_build() {
        let dim = 8;
        let data = create_clustered_data(3, 50, dim);

        let config = DualBranchConfig {
            m: 8,
            m_high_lid: 12,
            ef_construction: 50,
            ef_search: 20,
            seed: Some(42),
            ..Default::default()
        };

        let mut index = DualBranchHNSW::new(dim, config);
        index.add_vectors(&data).unwrap();
        index.build().unwrap();

        let stats = index.stats();
        println!("Stats: {:?}", stats);

        assert!(stats.num_edges > 0);
        assert!(stats.num_vectors > 0);
    }

    #[test]
    fn test_dual_branch_search() {
        let dim = 8;
        let data = create_clustered_data(3, 50, dim);
        let _n = data.len() / dim;

        let config = DualBranchConfig {
            m: 8,
            m_high_lid: 12,
            ef_construction: 50,
            ef_search: 30,
            seed: Some(42),
            ..Default::default()
        };

        let mut index = DualBranchHNSW::new(dim, config);
        index.add_vectors(&data).unwrap();
        index.build().unwrap();

        // Search for a point that exists in the index
        let query = &data[0..dim];
        let results = index.search(query, 5).unwrap();

        assert!(!results.is_empty());
        // First result should be the query itself (or very close)
        assert!(
            results[0].1 < 1.0,
            "First result distance: {}",
            results[0].1
        );
    }

    #[test]
    fn test_dual_branch_skip_bridges() {
        let dim = 8;
        let data = create_clustered_data(5, 30, dim);

        let config = DualBranchConfig {
            m: 6,
            m_high_lid: 10,
            ef_construction: 50,
            ef_search: 20,
            skip_bridge_probability: 0.5, // High for testing
            seed: Some(42),
            ..Default::default()
        };

        let mut index = DualBranchHNSW::new(dim, config);
        index.add_vectors(&data).unwrap();
        index.build().unwrap();

        let stats = index.stats();
        println!("Skip bridges: {}", stats.num_skip_bridges);
        println!("High-LID nodes: {}", stats.high_lid_nodes);

        // Should have some skip bridges
        // (may be 0 if RNG doesn't add any, but with high probability should have some)
        assert!(stats.num_vectors > 0);
    }

    #[test]
    fn test_dual_branch_lid_detection() {
        let dim = 4;

        // Create data with clear outliers
        let mut data = Vec::new();

        // Cluster 1: tight cluster at origin
        for i in 0..50 {
            data.extend_from_slice(&[0.1 * i as f32, 0.1, 0.1, 0.1]);
        }

        // Outliers: far from everything
        data.extend_from_slice(&[100.0, 100.0, 100.0, 100.0]);
        data.extend_from_slice(&[-100.0, -100.0, -100.0, -100.0]);

        let config = DualBranchConfig {
            m: 6,
            m_high_lid: 12,
            ef_construction: 30,
            seed: Some(42),
            ..Default::default()
        };

        let mut index = DualBranchHNSW::new(dim, config);
        index.add_vectors(&data).unwrap();
        index.build().unwrap();

        let stats = index.stats();
        println!("LID stats: {:?}", stats.lid_stats);

        // The outliers should have higher LID
        assert!(stats.high_lid_nodes > 0, "Should detect high-LID outliers");
    }

    #[test]
    fn test_dual_branch_recall() {
        let dim = 16;
        let data = create_clustered_data(5, 100, dim);
        let n = data.len() / dim;

        let config = DualBranchConfig {
            m: 12,
            m_high_lid: 18,
            ef_construction: 100,
            ef_search: 50,
            seed: Some(42),
            ..Default::default()
        };

        let mut index = DualBranchHNSW::new(dim, config);
        index.add_vectors(&data).unwrap();
        index.build().unwrap();

        // Test recall on random queries
        let mut rng = StdRng::seed_from_u64(123);
        let mut correct = 0;
        let num_queries = 20;
        let k = 10;

        for _ in 0..num_queries {
            let query_idx = rng.random_range(0..n);
            let query = &data[query_idx * dim..(query_idx + 1) * dim];

            let results = index.search(query, k).unwrap();

            // Ground truth: brute force
            let mut gt: Vec<(usize, f32)> = (0..n)
                .map(|i| {
                    let v = &data[i * dim..(i + 1) * dim];
                    (i, distance::l2_distance(query, v))
                })
                .collect();
            gt.sort_unstable_by(|a, b| a.1.total_cmp(&b.1));

            let gt_set: HashSet<u32> = gt.iter().take(k).map(|&(id, _)| id as u32).collect();
            let result_set: HashSet<u32> = results.iter().map(|&(id, _)| id).collect();

            correct += gt_set.intersection(&result_set).count();
        }

        let recall = correct as f32 / (num_queries * k) as f32;
        println!("Recall@{}: {:.2}%", k, recall * 100.0);

        // This simplified implementation focuses on demonstrating the LID-based
        // approach and skip bridges. Production recall requires more sophisticated
        // neighbor selection (heuristic pruning, diversity) implemented in HNSWIndex.
        // For now, we just verify it returns something reasonable.
        assert!(recall > 0.1, "Recall too low: {}", recall);
    }
}