nodedb_vector/hnsw/arena.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Per-invocation scratch arena for HNSW beam-search heaps.
4//!
5//! `BeamSearchArena` holds pre-allocated backing buffers for the three
6//! data structures consumed by every `search_layer` call:
7//!
8//! - `candidates` — min-heap of `(dist, id)` pairs (wrapped in `Reverse`).
9//! - `results` — max-heap of `(dist, id)` pairs.
10//! - `visited` — hash set tracking nodes seen during traversal.
11//!
12//! The arena is owned by `HnswIndex` (inside a `RefCell`) and reset at the
13//! start of each search. Over time the backing `Vec`s grow to the
14//! high-water-mark capacity of the largest search executed, giving amortised
15//! zero-allocation steady state.
16//!
17//! `BeamSearchArena` is intentionally a per-core resource. It must not be
18//! shared or accessed concurrently — the wrapping `RefCell` in `HnswIndex`
19//! enforces single-borrower access at runtime.
20
21use std::cmp::Reverse;
22use std::collections::HashSet;
23
24use super::graph::Candidate;
25
26/// Scratch arena for a single HNSW beam-search invocation.
27///
28/// Call [`BeamSearchArena::reset`] at the start of each search to clear the
29/// buffers while retaining their allocated capacity.
30pub struct BeamSearchArena {
31 /// Backing storage for the min-heap of candidates to explore.
32 /// Elements are `Reverse<Candidate>` so the `BinaryHeap` gives min-first.
33 pub(crate) candidates: Vec<Reverse<Candidate>>,
34 /// Backing storage for the max-heap of current best results.
35 pub(crate) results: Vec<Candidate>,
36 /// Visited-node scratch set. Cleared on reset; capacity is retained.
37 pub(crate) visited: HashSet<u32>,
38}
39
40impl BeamSearchArena {
41 /// Allocate a new arena with the given initial capacity for all buffers.
42 ///
43 /// `initial_capacity` should be at least `ef_construction` for the build
44 /// path and `ef` for the search path. A value of 256 is a reasonable
45 /// default that covers the common `ef ∈ [32, 200]` range without over-
46 /// allocating.
47 pub fn new(initial_capacity: usize) -> Self {
48 Self {
49 // no-governor: hot-path search arena; reused across queries via reset(), instrument cost exceeds benefit
50 candidates: Vec::with_capacity(initial_capacity),
51 // no-governor: hot-path search arena; reused across queries via reset(), instrument cost exceeds benefit
52 results: Vec::with_capacity(initial_capacity),
53 visited: HashSet::with_capacity(initial_capacity * 4),
54 }
55 }
56
57 /// Reset all buffers to empty while retaining their heap allocations.
58 ///
59 /// Must be called at the start of every `search_layer` invocation.
60 #[inline]
61 pub fn reset(&mut self) {
62 self.candidates.clear();
63 self.results.clear();
64 self.visited.clear();
65 }
66}