Skip to main content

nodedb_types/config/tuning/
engines.rs

1//! Per-engine tuning: Vector, Sparse, Graph, Timeseries.
2
3use serde::{Deserialize, Serialize};
4
5/// Vector engine tuning (HNSW, PQ, IVF).
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct VectorTuning {
8    #[serde(default = "default_flat_index_threshold")]
9    pub flat_index_threshold: usize,
10    #[serde(default = "default_seal_threshold")]
11    pub seal_threshold: usize,
12    #[serde(default = "default_pq_m")]
13    pub default_pq_m: usize,
14    #[serde(default = "default_ivf_cells")]
15    pub default_ivf_cells: usize,
16    #[serde(default = "default_ivf_nprobe")]
17    pub default_ivf_nprobe: usize,
18}
19
20impl Default for VectorTuning {
21    fn default() -> Self {
22        Self {
23            flat_index_threshold: default_flat_index_threshold(),
24            seal_threshold: default_seal_threshold(),
25            default_pq_m: default_pq_m(),
26            default_ivf_cells: default_ivf_cells(),
27            default_ivf_nprobe: default_ivf_nprobe(),
28        }
29    }
30}
31
32fn default_flat_index_threshold() -> usize {
33    10_000
34}
35fn default_seal_threshold() -> usize {
36    65_536
37}
38fn default_pq_m() -> usize {
39    8
40}
41fn default_ivf_cells() -> usize {
42    256
43}
44fn default_ivf_nprobe() -> usize {
45    16
46}
47
48/// Sparse/metadata engine tuning (BM25, GSI, HyperLogLog).
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct SparseTuning {
51    #[serde(default = "default_bm25_k1")]
52    pub bm25_k1: f32,
53    #[serde(default = "default_bm25_b")]
54    pub bm25_b: f32,
55    #[serde(default = "default_max_gsis_per_collection")]
56    pub max_gsis_per_collection: usize,
57    #[serde(default = "default_hll_m")]
58    pub hll_registers: usize,
59    #[serde(default = "default_hll_p")]
60    pub hll_precision: u32,
61}
62
63impl Default for SparseTuning {
64    fn default() -> Self {
65        Self {
66            bm25_k1: default_bm25_k1(),
67            bm25_b: default_bm25_b(),
68            max_gsis_per_collection: default_max_gsis_per_collection(),
69            hll_registers: default_hll_m(),
70            hll_precision: default_hll_p(),
71        }
72    }
73}
74
75fn default_bm25_k1() -> f32 {
76    1.2
77}
78fn default_bm25_b() -> f32 {
79    0.75
80}
81fn default_max_gsis_per_collection() -> usize {
82    4
83}
84fn default_hll_m() -> usize {
85    256
86}
87fn default_hll_p() -> u32 {
88    8
89}
90
91/// Default cap on visited nodes during BFS traversals.
92/// Prevents supernode fan-out explosion from consuming unbounded memory.
93pub const DEFAULT_MAX_VISITED: usize = 100_000;
94
95/// Default maximum BFS traversal depth.
96pub const DEFAULT_MAX_DEPTH: usize = 10;
97
98/// Graph engine tuning (traversal limits, LCC algorithm).
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct GraphTuning {
101    #[serde(default = "default_max_visited")]
102    pub max_visited: usize,
103    #[serde(default = "default_max_depth")]
104    pub max_depth: usize,
105    #[serde(default = "default_lcc_high_degree_threshold")]
106    pub lcc_high_degree_threshold: usize,
107    #[serde(default = "default_lcc_sample_pairs")]
108    pub lcc_sample_pairs: usize,
109}
110
111impl Default for GraphTuning {
112    fn default() -> Self {
113        Self {
114            max_visited: default_max_visited(),
115            max_depth: default_max_depth(),
116            lcc_high_degree_threshold: default_lcc_high_degree_threshold(),
117            lcc_sample_pairs: default_lcc_sample_pairs(),
118        }
119    }
120}
121
122fn default_max_visited() -> usize {
123    DEFAULT_MAX_VISITED
124}
125fn default_max_depth() -> usize {
126    DEFAULT_MAX_DEPTH
127}
128fn default_lcc_high_degree_threshold() -> usize {
129    2_000
130}
131fn default_lcc_sample_pairs() -> usize {
132    10_000
133}
134
135/// Timeseries engine tuning (memtable budgets, block sizes).
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct TimeseriesToning {
138    #[serde(default = "default_memtable_budget_bytes")]
139    pub memtable_budget_bytes: usize,
140    #[serde(default = "default_total_budget_bytes")]
141    pub total_budget_bytes: usize,
142    #[serde(default = "default_ts_block_size")]
143    pub block_size: usize,
144}
145
146impl Default for TimeseriesToning {
147    fn default() -> Self {
148        Self {
149            memtable_budget_bytes: default_memtable_budget_bytes(),
150            total_budget_bytes: default_total_budget_bytes(),
151            block_size: default_ts_block_size(),
152        }
153    }
154}
155
156fn default_memtable_budget_bytes() -> usize {
157    64 * 1024 * 1024
158}
159fn default_total_budget_bytes() -> usize {
160    100 * 1024 * 1024
161}
162fn default_ts_block_size() -> usize {
163    1024
164}
165
166/// KV engine tuning (hash table, expiry wheel, slab allocator).
167///
168/// Controls the per-core hash table parameters, incremental rehash behavior,
169/// expiry wheel tick rate, and the per-tick reap budget that prevents reactor
170/// stalls during mass-expiry events.
171#[derive(Debug, Clone, Serialize, Deserialize)]
172pub struct KvTuning {
173    /// Default initial hash table capacity per collection (number of slots).
174    /// Should be a power of two. Larger values reduce early rehash churn for
175    /// collections that will grow quickly. Default: 16384.
176    #[serde(default = "default_kv_capacity")]
177    pub default_capacity: usize,
178
179    /// Hash table load factor threshold that triggers incremental rehash.
180    /// When `entries / capacity > rehash_load_factor`, the table begins
181    /// doubling. Range: 0.5–0.9. Default: 0.75 (standard Robin Hood threshold).
182    #[serde(default = "default_kv_rehash_load_factor")]
183    pub rehash_load_factor: f32,
184
185    /// Number of entries rehashed per PUT during incremental rehash.
186    /// Higher values complete rehash faster but add per-PUT latency.
187    /// Default: 8.
188    #[serde(default = "default_kv_rehash_batch_size")]
189    pub rehash_batch_size: usize,
190
191    /// Default inline value threshold in bytes. Values at or below this size
192    /// are stored directly in the hash entry (no pointer chase). Larger values
193    /// overflow to slab-allocated Binary Tuples. Default: 64.
194    #[serde(default = "default_kv_inline_threshold")]
195    pub default_inline_threshold: usize,
196
197    /// Maximum expirations processed per reactor tick (event loop iteration).
198    /// Prevents mass-expiry events (e.g., 10M keys with identical TTL) from
199    /// stalling the TPC core. Expired-but-not-yet-reaped keys are invisible
200    /// to GET (lazy fallback). Default: 1024.
201    #[serde(default = "default_kv_expiry_reap_budget")]
202    pub expiry_reap_budget: usize,
203
204    /// Expiry wheel tick interval in milliseconds. Determines the granularity
205    /// of TTL expiration. Lower = more precise but more CPU overhead.
206    /// Default: 1000 (1 second).
207    #[serde(default = "default_kv_expiry_tick_ms")]
208    pub expiry_tick_ms: u64,
209}
210
211impl Default for KvTuning {
212    fn default() -> Self {
213        Self {
214            default_capacity: default_kv_capacity(),
215            rehash_load_factor: default_kv_rehash_load_factor(),
216            rehash_batch_size: default_kv_rehash_batch_size(),
217            default_inline_threshold: default_kv_inline_threshold(),
218            expiry_reap_budget: default_kv_expiry_reap_budget(),
219            expiry_tick_ms: default_kv_expiry_tick_ms(),
220        }
221    }
222}
223
224fn default_kv_capacity() -> usize {
225    16_384
226}
227fn default_kv_rehash_load_factor() -> f32 {
228    0.75
229}
230fn default_kv_rehash_batch_size() -> usize {
231    8
232}
233fn default_kv_inline_threshold() -> usize {
234    64
235}
236fn default_kv_expiry_reap_budget() -> usize {
237    1024
238}
239fn default_kv_expiry_tick_ms() -> u64 {
240    1000
241}