Skip to main content

nodedb_vector/planner/
query_options.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! `VectorQueryOptions` — all knobs the vector planner exposes to callers.
4//!
5//! Mirrors the SQL syntax:
6//! ```sql
7//! SELECT … ORDER BY embedding <=> $q LIMIT 10
8//! WITH (quantization='rabitq', rerank=100, query_dim=512, meta_tokens=8)
9//! ```
10
11/// Which graph index to use for this query.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13#[non_exhaustive]
14pub enum IndexType {
15    /// In-memory HNSW graph.  Default.
16    Hnsw,
17    /// SSD-resident Vamana graph (DiskANN).  Selected for billion-scale collections.
18    Vamana,
19}
20
21/// Which quantization codec to use during traversal and optionally rerank.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23#[non_exhaustive]
24pub enum QuantizationKind {
25    /// 8-bit scalar quantization (~0.97 recall with no rerank).
26    Sq8,
27    /// Product quantization (~0.92 recall).
28    Pq,
29    /// 1-bit RaBitQ with `O(1/√D)` error bound (SIGMOD 2024).
30    RaBitQ,
31    /// Better Binary Quantization — centroid-centered asymmetric 1-bit + rerank.
32    Bbq,
33    /// Raw sign-bit binary (cheap; ~0.85 recall without rerank).
34    Binary,
35    /// BitNet 1.58 trit-packed ternary.
36    Ternary,
37    /// OPQ — learned rotation applied before PQ.
38    Opq,
39    /// No quantization — FP32 distance in hot path.
40    None,
41}
42
43/// All knobs the vector planner exposes.
44///
45/// This struct is constructed by `nodedb-sql`'s engine rules from query hints
46/// and passed through to `nodedb-vector`'s search entry-points.  Every field
47/// has a sensible default so callers can build with `..Default::default()`.
48#[derive(Debug, Clone)]
49pub struct VectorQueryOptions {
50    /// Graph index type to use.
51    pub index_type: IndexType,
52
53    /// Quantization codec for in-graph distance computation.
54    pub quantization: QuantizationKind,
55
56    /// Matryoshka coarse dimension for adaptive-dim querying.
57    ///
58    /// When `Some(d)`, the first `d` dimensions are used for coarse-pass
59    /// traversal; a full-dimension rerank follows over top-`ef_search`
60    /// candidates.  `None` means use the full embedding dimension.
61    pub query_dim: Option<u32>,
62
63    /// BBQ / RaBitQ rerank oversample multiplier.
64    ///
65    /// The actual rerank candidate count is `oversample * ef_search`.
66    /// Ignored when `quantization == None`.
67    pub oversample: u8,
68
69    /// MetaEmbed Meta Token budget for budgeted MaxSim.
70    ///
71    /// `None` uses the default token count for the collection.
72    pub meta_token_budget: Option<u8>,
73
74    /// Beam width for HNSW / Vamana search.  Must be >= `k`.
75    pub ef_search: usize,
76
77    /// Number of nearest neighbours to return.
78    pub k: usize,
79
80    /// Target recall in `[0.0, 1.0]`.  The planner uses this to pick the
81    /// quantization tier and oversample ratio when the caller does not
82    /// specify them explicitly.
83    pub target_recall: f32,
84}
85
86impl Default for VectorQueryOptions {
87    fn default() -> Self {
88        Self {
89            index_type: IndexType::Hnsw,
90            quantization: QuantizationKind::Sq8,
91            query_dim: None,
92            oversample: 3,
93            meta_token_budget: None,
94            ef_search: 64,
95            k: 10,
96            target_recall: 0.95,
97        }
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    #[test]
106    fn default_produces_reasonable_values() {
107        let opts = VectorQueryOptions::default();
108        assert_eq!(opts.index_type, IndexType::Hnsw);
109        assert_eq!(opts.oversample, 3);
110        assert!(opts.ef_search >= opts.k, "ef_search must be >= k");
111        assert!(opts.target_recall > 0.0 && opts.target_recall <= 1.0);
112        assert!(opts.query_dim.is_none());
113        assert!(opts.meta_token_budget.is_none());
114    }
115
116    #[test]
117    fn custom_options_roundtrip() {
118        let opts = VectorQueryOptions {
119            index_type: IndexType::Vamana,
120            quantization: QuantizationKind::RaBitQ,
121            query_dim: Some(512),
122            oversample: 5,
123            meta_token_budget: Some(8),
124            ef_search: 128,
125            k: 20,
126            target_recall: 0.99,
127        };
128        assert_eq!(opts.index_type, IndexType::Vamana);
129        assert_eq!(opts.quantization, QuantizationKind::RaBitQ);
130        assert_eq!(opts.query_dim, Some(512));
131        assert_eq!(opts.oversample, 5);
132        assert_eq!(opts.meta_token_budget, Some(8));
133        assert_eq!(opts.ef_search, 128);
134        assert_eq!(opts.k, 20);
135        assert!((opts.target_recall - 0.99).abs() < f32::EPSILON);
136    }
137}