nodedb_vector/planner/query_options.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! `VectorQueryOptions` — all knobs the vector planner exposes to callers.
4//!
5//! Mirrors the SQL syntax:
6//! ```sql
7//! SELECT … ORDER BY embedding <=> $q LIMIT 10
8//! WITH (quantization='rabitq', rerank=100, query_dim=512, meta_tokens=8)
9//! ```
10
11/// Which graph index to use for this query.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13#[non_exhaustive]
14pub enum IndexType {
15 /// In-memory HNSW graph. Default.
16 Hnsw,
17 /// SSD-resident Vamana graph (DiskANN). Selected for billion-scale collections.
18 Vamana,
19}
20
21/// Which quantization codec to use during traversal and optionally rerank.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23#[non_exhaustive]
24pub enum QuantizationKind {
25 /// 8-bit scalar quantization (~0.97 recall with no rerank).
26 Sq8,
27 /// Product quantization (~0.92 recall).
28 Pq,
29 /// 1-bit RaBitQ with `O(1/√D)` error bound (SIGMOD 2024).
30 RaBitQ,
31 /// Better Binary Quantization — centroid-centered asymmetric 1-bit + rerank.
32 Bbq,
33 /// Raw sign-bit binary (cheap; ~0.85 recall without rerank).
34 Binary,
35 /// BitNet 1.58 trit-packed ternary.
36 Ternary,
37 /// OPQ — learned rotation applied before PQ.
38 Opq,
39 /// No quantization — FP32 distance in hot path.
40 None,
41}
42
43/// All knobs the vector planner exposes.
44///
45/// This struct is constructed by `nodedb-sql`'s engine rules from query hints
46/// and passed through to `nodedb-vector`'s search entry-points. Every field
47/// has a sensible default so callers can build with `..Default::default()`.
48#[derive(Debug, Clone)]
49pub struct VectorQueryOptions {
50 /// Graph index type to use.
51 pub index_type: IndexType,
52
53 /// Quantization codec for in-graph distance computation.
54 pub quantization: QuantizationKind,
55
56 /// Matryoshka coarse dimension for adaptive-dim querying.
57 ///
58 /// When `Some(d)`, the first `d` dimensions are used for coarse-pass
59 /// traversal; a full-dimension rerank follows over top-`ef_search`
60 /// candidates. `None` means use the full embedding dimension.
61 pub query_dim: Option<u32>,
62
63 /// BBQ / RaBitQ rerank oversample multiplier.
64 ///
65 /// The actual rerank candidate count is `oversample * ef_search`.
66 /// Ignored when `quantization == None`.
67 pub oversample: u8,
68
69 /// MetaEmbed Meta Token budget for budgeted MaxSim.
70 ///
71 /// `None` uses the default token count for the collection.
72 pub meta_token_budget: Option<u8>,
73
74 /// Beam width for HNSW / Vamana search. Must be >= `k`.
75 pub ef_search: usize,
76
77 /// Number of nearest neighbours to return.
78 pub k: usize,
79
80 /// Target recall in `[0.0, 1.0]`. The planner uses this to pick the
81 /// quantization tier and oversample ratio when the caller does not
82 /// specify them explicitly.
83 pub target_recall: f32,
84}
85
86impl Default for VectorQueryOptions {
87 fn default() -> Self {
88 Self {
89 index_type: IndexType::Hnsw,
90 quantization: QuantizationKind::Sq8,
91 query_dim: None,
92 oversample: 3,
93 meta_token_budget: None,
94 ef_search: 64,
95 k: 10,
96 target_recall: 0.95,
97 }
98 }
99}
100
101#[cfg(test)]
102mod tests {
103 use super::*;
104
105 #[test]
106 fn default_produces_reasonable_values() {
107 let opts = VectorQueryOptions::default();
108 assert_eq!(opts.index_type, IndexType::Hnsw);
109 assert_eq!(opts.oversample, 3);
110 assert!(opts.ef_search >= opts.k, "ef_search must be >= k");
111 assert!(opts.target_recall > 0.0 && opts.target_recall <= 1.0);
112 assert!(opts.query_dim.is_none());
113 assert!(opts.meta_token_budget.is_none());
114 }
115
116 #[test]
117 fn custom_options_roundtrip() {
118 let opts = VectorQueryOptions {
119 index_type: IndexType::Vamana,
120 quantization: QuantizationKind::RaBitQ,
121 query_dim: Some(512),
122 oversample: 5,
123 meta_token_budget: Some(8),
124 ef_search: 128,
125 k: 20,
126 target_recall: 0.99,
127 };
128 assert_eq!(opts.index_type, IndexType::Vamana);
129 assert_eq!(opts.quantization, QuantizationKind::RaBitQ);
130 assert_eq!(opts.query_dim, Some(512));
131 assert_eq!(opts.oversample, 5);
132 assert_eq!(opts.meta_token_budget, Some(8));
133 assert_eq!(opts.ef_search, 128);
134 assert_eq!(opts.k, 20);
135 assert!((opts.target_recall - 0.99).abs() < f32::EPSILON);
136 }
137}