Skip to main content

iqdb_hnsw/
lib.rs

1//! # iqdb-hnsw
2//!
3//! Hierarchical Navigable Small World approximate nearest-neighbour
4//! index for the iQDB vector-database spine, implementing the
5//! Malkov–Yashunin (2016) algorithm. [`HnswIndex`] builds a multi-layer
6//! proximity graph at insert time and answers top-`k` queries by
7//! beam-search descent through the layers.
8//!
9//! At the default `ef_search = 64`, recall@10 against the exact
10//! `iqdb_flat::FlatIndex` oracle clears `0.95` on real data — SIFT-1M
11//! (dim=128) measures recall@10 = 0.9644 at this default.
12//!
13//! ## Design
14//!
15//! - Storage is `Vec<Arc<[f32]>>` per row plus parallel `Vec`s for
16//!   `VectorId`, `Option<Metadata>`, insertion-sequence number,
17//!   tombstone flag, and top-layer index. The row payload is wrapped
18//!   in `Arc` so the engine shares one allocation between this index
19//!   and its record store.
20//! - Distance math is delegated to
21//!   [`iqdb_distance::compute_batch`] — HNSW never reimplements a
22//!   metric. `DotProduct` is negated at the boundary so
23//!   [`Hit::distance`] is *smaller-is-nearer* across all five metrics.
24//! - The layer-assignment PRNG is a hand-rolled SplitMix64 seeded
25//!   from [`HnswConfig::seed`]; no external `rand` dependency.
26//!   Identical insert order + identical seed produces a byte-
27//!   identical graph and identical query results.
28//! - Per-layer adjacency is bounded by [`HnswConfig::m`] (`2 * m` at
29//!   layer 0). Insert (Alg 1) selects diverse neighbours via the
30//!   Alg 4 heuristic, links bidirectionally, and re-runs the
31//!   heuristic on any neighbour that overflows its cap.
32//! - Search (Alg 5) greedy-descends from the global entry through
33//!   the upper layers with `ef = 1`, then runs SEARCH-LAYER (Alg 2)
34//!   at layer 0 with `ef = max(ef_search, k)`.
35//! - Delete is tombstone-only. Tombstoned nodes stay in graph
36//!   traversal for connectivity but are never returned as `Hit`s.
37//! - Filter handling is post-filter via
38//!   [`iqdb_filter::FilterEvaluator`]. The beam widens by
39//!   [`HnswConfig::filter_widen`] when a filter is present;
40//!   selective filters can still under-return.
41//!
42//! ## Example
43//!
44//! ```
45//! use std::sync::Arc;
46//!
47//! use iqdb_hnsw::{HnswConfig, HnswIndex};
48//! use iqdb_index::{Index, IndexCore};
49//! use iqdb_types::{DistanceMetric, SearchParams, VectorId};
50//!
51//! # fn main() -> iqdb_types::Result<()> {
52//! let mut idx = HnswIndex::new(2, DistanceMetric::Euclidean, HnswConfig::default())?;
53//! idx.insert(VectorId::from(1u64), Arc::<[f32]>::from(&[0.0, 0.0][..]), None)?;
54//! idx.insert(VectorId::from(2u64), Arc::<[f32]>::from(&[3.0, 4.0][..]), None)?;
55//! idx.insert(VectorId::from(3u64), Arc::<[f32]>::from(&[1.0, 0.0][..]), None)?;
56//!
57//! let hits = idx.search(&[0.0, 0.0], &SearchParams::new(2, DistanceMetric::Euclidean))?;
58//! assert_eq!(hits.len(), 2);
59//! assert_eq!(hits[0].id, VectorId::U64(1));
60//! assert_eq!(hits[1].id, VectorId::U64(3));
61//! # Ok(())
62//! # }
63//! ```
64
65#![cfg_attr(docsrs, feature(doc_cfg))]
66#![deny(warnings)]
67#![deny(missing_docs)]
68#![deny(unsafe_op_in_unsafe_fn)]
69#![deny(unused_must_use)]
70#![deny(unused_results)]
71#![deny(clippy::unwrap_used)]
72#![deny(clippy::expect_used)]
73#![deny(clippy::todo)]
74#![deny(clippy::unimplemented)]
75#![deny(clippy::print_stdout)]
76#![deny(clippy::print_stderr)]
77#![deny(clippy::dbg_macro)]
78#![deny(clippy::unreachable)]
79#![deny(clippy::undocumented_unsafe_blocks)]
80#![forbid(unsafe_code)]
81
82mod config;
83mod graph;
84mod index;
85mod insert;
86mod rng;
87mod search;
88mod topk;
89
90pub use crate::config::HnswConfig;
91pub use crate::index::HnswIndex;
92
93// Re-export the `Hit` type that searches return, so callers can drive
94// `HnswIndex` without a second `use` line for the result type.
95pub use iqdb_types::Hit;
96
97/// The version of this crate, taken from `Cargo.toml` at compile time.
98///
99/// # Examples
100///
101/// ```
102/// let version = iqdb_hnsw::VERSION;
103/// assert_eq!(version.split('.').count(), 3);
104/// ```
105pub const VERSION: &str = env!("CARGO_PKG_VERSION");