Skip to main content

leann_core/hnsw/
graph.rs

1use crate::index::DistanceMetric;
2use serde::{Deserialize, Serialize};
3
4/// Configuration parameters for an HNSW graph.
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct HnswConfig {
7    /// Number of bi-directional links per element (higher = better recall, more memory).
8    pub m: usize,
9    /// Size of the dynamic candidate list during construction.
10    pub ef_construction: usize,
11    /// Size of the dynamic candidate list during search.
12    pub ef_search: usize,
13    /// Distance metric for similarity computation.
14    pub distance_metric: DistanceMetric,
15    /// Whether to use compact CSR storage.
16    pub is_compact: bool,
17    /// Whether the index uses embedding recomputation (pruned storage).
18    pub is_recompute: bool,
19    /// Optional seed for deterministic level assignment. `None` uses a default.
20    #[serde(default)]
21    pub seed: Option<u64>,
22}
23
24impl Default for HnswConfig {
25    fn default() -> Self {
26        Self {
27            m: 32,
28            ef_construction: 200,
29            ef_search: 64,
30            distance_metric: DistanceMetric::Mips,
31            is_compact: true,
32            is_recompute: true,
33            seed: None,
34        }
35    }
36}
37
38/// The HNSW graph structure.
39///
40/// Supports two storage modes:
41/// 1. **Standard (non-compact)**: Flat adjacency lists with offset arrays (FAISS-compatible).
42/// 2. **Compact (CSR)**: Compressed sparse row format with separate level pointers.
43pub struct HnswGraph {
44    /// Number of vectors indexed.
45    pub ntotal: usize,
46    /// Dimensionality of the vectors.
47    pub dimensions: usize,
48    /// Entry point node for search.
49    pub entry_point: i32,
50    /// Maximum level in the graph.
51    pub max_level: i32,
52    /// Level assignment for each node (levels[i] = number of levels for node i).
53    pub levels: Vec<i32>,
54    /// Probability of level assignment.
55    pub assign_probas: Vec<f64>,
56    /// Cumulative number of neighbors per level.
57    pub cum_nneighbor_per_level: Vec<i32>,
58    /// Configuration used during build.
59    pub config: HnswConfig,
60    /// FAISS metric type (1 = INNER_PRODUCT, 0 = L2).
61    pub metric_type: i32,
62    /// Metric argument (for custom metrics).
63    pub metric_arg: f32,
64    /// Graph storage (either standard or compact).
65    pub storage: GraphStorage,
66    /// The storage section data from the FAISS file (IndexFlat data or null).
67    pub vector_storage: VectorStorage,
68}
69
70/// Graph adjacency storage.
71pub enum GraphStorage {
72    /// Standard FAISS format: offsets[ntotal+1] + neighbors[total_links].
73    Standard {
74        offsets: Vec<u64>,
75        neighbors: Vec<i32>,
76    },
77    /// Compact CSR format.
78    Compact {
79        /// Pointers into the neighbors array for each level of each node.
80        level_ptr: Vec<u64>,
81        /// Offsets into level_ptr for each node.
82        node_offsets: Vec<u64>,
83        /// Compact neighbor data (only valid neighbors, no -1 padding).
84        neighbors: Vec<i32>,
85    },
86}
87
88/// Vector storage section from the FAISS index file.
89pub enum VectorStorage {
90    /// No vector storage (pruned/recompute mode). FourCC = "null".
91    Null,
92    /// Raw vector storage bytes with its FourCC identifier.
93    Raw { fourcc: u32, data: Vec<u8> },
94}
95
96// FourCC constants matching the FAISS format.
97pub const FOURCC_HNSW_FLAT: u32 = u32::from_le_bytes(*b"IHNf");
98pub const FOURCC_NULL: u32 = u32::from_le_bytes(*b"null");
99
100impl HnswGraph {
101    /// Get the number of neighbors at a given level using cumulative neighbor counts.
102    #[inline]
103    pub fn neighbors_at_level(&self, level: usize) -> usize {
104        if level == 0 {
105            if self.cum_nneighbor_per_level.is_empty() {
106                return 0;
107            }
108            self.cum_nneighbor_per_level[0] as usize
109        } else if level < self.cum_nneighbor_per_level.len() {
110            (self.cum_nneighbor_per_level[level] - self.cum_nneighbor_per_level[level - 1]) as usize
111        } else {
112            0
113        }
114    }
115
116    /// Get neighbors of a node at a specific level (standard format).
117    #[inline]
118    pub fn get_neighbors(&self, node: usize, level: usize) -> &[i32] {
119        match &self.storage {
120            GraphStorage::Standard { offsets, neighbors } => {
121                let offset = offsets[node] as usize;
122                let cum_begin = if level == 0 {
123                    0
124                } else {
125                    self.cum_nneighbor_per_level[level - 1] as usize
126                };
127                let cum_end = if level < self.cum_nneighbor_per_level.len() {
128                    self.cum_nneighbor_per_level[level] as usize
129                } else {
130                    return &[];
131                };
132                let begin = offset + cum_begin;
133                let end = offset + cum_end;
134                if end <= neighbors.len() {
135                    &neighbors[begin..end]
136                } else {
137                    &[]
138                }
139            }
140            GraphStorage::Compact {
141                level_ptr,
142                node_offsets,
143                neighbors,
144            } => {
145                let base = node_offsets[node] as usize;
146                let ptr_idx = base + level;
147                if ptr_idx + 1 >= level_ptr.len() {
148                    return &[];
149                }
150                let begin = level_ptr[ptr_idx] as usize;
151                let end = level_ptr[ptr_idx + 1] as usize;
152                if end <= neighbors.len() {
153                    &neighbors[begin..end]
154                } else {
155                    &[]
156                }
157            }
158        }
159    }
160
161    /// Check if the graph uses compact storage.
162    pub fn is_compact(&self) -> bool {
163        matches!(self.storage, GraphStorage::Compact { .. })
164    }
165
166    /// Check if vector storage has been pruned.
167    pub fn is_pruned(&self) -> bool {
168        matches!(self.vector_storage, VectorStorage::Null)
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_hnsw_config_default() {
178        let config = HnswConfig::default();
179        assert_eq!(config.m, 32);
180        assert_eq!(config.ef_construction, 200);
181        assert_eq!(config.ef_search, 64);
182        assert_eq!(config.distance_metric, DistanceMetric::Mips);
183    }
184
185    #[test]
186    fn test_fourcc_constants() {
187        assert_eq!(FOURCC_HNSW_FLAT, u32::from_le_bytes(*b"IHNf"));
188        assert_eq!(FOURCC_NULL, u32::from_le_bytes(*b"null"));
189    }
190}