lance_index/vector/
hnsw.rs1use arrow_schema::{DataType, Field};
10use deepsize::DeepSizeOf;
11use itertools::Itertools;
12use serde::{Deserialize, Serialize};
13
14use self::builder::HnswBuildParams;
15use super::graph::OrderedNode;
16use super::storage::VectorStore;
17
18pub mod builder;
19pub mod index;
20
21pub use builder::HNSW;
22pub use index::HNSWIndex;
23
24const HNSW_TYPE: &str = "HNSW";
25const VECTOR_ID_COL: &str = "__vector_id";
26const POINTER_COL: &str = "__pointer";
27
28use std::sync::LazyLock;
29
30pub static POINTER_FIELD: LazyLock<Field> =
33 LazyLock::new(|| Field::new(POINTER_COL, DataType::UInt32, true));
34
35pub static VECTOR_ID_FIELD: LazyLock<Field> =
37 LazyLock::new(|| Field::new(VECTOR_ID_COL, DataType::UInt32, true));
38
39#[derive(Debug, Clone, Serialize, Deserialize, DeepSizeOf)]
40pub struct HnswMetadata {
41 pub entry_point: u32,
42 pub params: HnswBuildParams,
43 pub level_offsets: Vec<usize>,
44}
45
46impl Default for HnswMetadata {
47 fn default() -> Self {
48 let params = HnswBuildParams::default();
49 let level_offsets = vec![0; params.max_level as usize];
50 Self {
51 entry_point: 0,
52 params,
53 level_offsets,
54 }
55 }
56}
57
58fn select_neighbors_heuristic(
63 storage: &impl VectorStore,
64 candidates: &[OrderedNode],
65 k: usize,
66) -> Vec<OrderedNode> {
67 if candidates.len() <= k {
68 return candidates.iter().cloned().collect_vec();
69 }
70 let mut candidates = candidates.to_vec();
71 candidates.sort_unstable();
72
73 let mut results: Vec<OrderedNode> = Vec::with_capacity(k);
74 for u in candidates.iter() {
75 if results.len() >= k {
76 break;
77 }
78
79 if results.is_empty() || storage.prefers_candidate(u, &results) {
80 results.push(u.clone());
81 }
82 }
83 results
84}