Skip to main content

nodedb_vector/hnsw/
arena.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Per-invocation scratch arena for HNSW beam-search heaps.
4//!
5//! `BeamSearchArena` holds pre-allocated backing buffers for the three
6//! data structures consumed by every `search_layer` call:
7//!
8//! - `candidates` — min-heap of `(dist, id)` pairs (wrapped in `Reverse`).
9//! - `results`    — max-heap of `(dist, id)` pairs.
10//! - `visited`    — hash set tracking nodes seen during traversal.
11//!
12//! The arena is owned by `HnswIndex` (inside a `RefCell`) and reset at the
13//! start of each search.  Over time the backing `Vec`s grow to the
14//! high-water-mark capacity of the largest search executed, giving amortised
15//! zero-allocation steady state.
16//!
17//! `BeamSearchArena` is intentionally a per-core resource.  It must not be
18//! shared or accessed concurrently — the wrapping `RefCell` in `HnswIndex`
19//! enforces single-borrower access at runtime.
20
21use std::cmp::Reverse;
22use std::collections::HashSet;
23
24use super::graph::Candidate;
25
26/// Scratch arena for a single HNSW beam-search invocation.
27///
28/// Call [`BeamSearchArena::reset`] at the start of each search to clear the
29/// buffers while retaining their allocated capacity.
30pub struct BeamSearchArena {
31    /// Backing storage for the min-heap of candidates to explore.
32    /// Elements are `Reverse<Candidate>` so the `BinaryHeap` gives min-first.
33    pub(crate) candidates: Vec<Reverse<Candidate>>,
34    /// Backing storage for the max-heap of current best results.
35    pub(crate) results: Vec<Candidate>,
36    /// Visited-node scratch set.  Cleared on reset; capacity is retained.
37    pub(crate) visited: HashSet<u32>,
38}
39
40impl BeamSearchArena {
41    /// Allocate a new arena with the given initial capacity for all buffers.
42    ///
43    /// `initial_capacity` should be at least `ef_construction` for the build
44    /// path and `ef` for the search path.  A value of 256 is a reasonable
45    /// default that covers the common `ef ∈ [32, 200]` range without over-
46    /// allocating.
47    pub fn new(initial_capacity: usize) -> Self {
48        Self {
49            // no-governor: hot-path search arena; reused across queries via reset(), instrument cost exceeds benefit
50            candidates: Vec::with_capacity(initial_capacity),
51            // no-governor: hot-path search arena; reused across queries via reset(), instrument cost exceeds benefit
52            results: Vec::with_capacity(initial_capacity),
53            visited: HashSet::with_capacity(initial_capacity * 4),
54        }
55    }
56
57    /// Reset all buffers to empty while retaining their heap allocations.
58    ///
59    /// Must be called at the start of every `search_layer` invocation.
60    #[inline]
61    pub fn reset(&mut self) {
62        self.candidates.clear();
63        self.results.clear();
64        self.visited.clear();
65    }
66}