Skip to main content

nodedb_types/config/tuning/
engines.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Per-engine tuning: Vector, Sparse, Graph, Timeseries.
4
5use serde::{Deserialize, Serialize};
6
7/// Vector engine tuning (HNSW, PQ, IVF).
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct VectorTuning {
10    #[serde(default = "default_flat_index_threshold")]
11    pub flat_index_threshold: usize,
12    #[serde(default = "default_seal_threshold")]
13    pub seal_threshold: usize,
14    #[serde(default = "default_pq_m")]
15    pub default_pq_m: usize,
16    #[serde(default = "default_ivf_cells")]
17    pub default_ivf_cells: usize,
18    #[serde(default = "default_ivf_nprobe")]
19    pub default_ivf_nprobe: usize,
20}
21
22impl Default for VectorTuning {
23    fn default() -> Self {
24        Self {
25            flat_index_threshold: default_flat_index_threshold(),
26            seal_threshold: default_seal_threshold(),
27            default_pq_m: default_pq_m(),
28            default_ivf_cells: default_ivf_cells(),
29            default_ivf_nprobe: default_ivf_nprobe(),
30        }
31    }
32}
33
34fn default_flat_index_threshold() -> usize {
35    10_000
36}
37fn default_seal_threshold() -> usize {
38    65_536
39}
40fn default_pq_m() -> usize {
41    8
42}
43fn default_ivf_cells() -> usize {
44    256
45}
46fn default_ivf_nprobe() -> usize {
47    16
48}
49
50/// Sparse/metadata engine tuning (BM25, GSI, HyperLogLog).
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct SparseTuning {
53    #[serde(default = "default_bm25_k1")]
54    pub bm25_k1: f32,
55    #[serde(default = "default_bm25_b")]
56    pub bm25_b: f32,
57    #[serde(default = "default_max_gsis_per_collection")]
58    pub max_gsis_per_collection: usize,
59    #[serde(default = "default_hll_m")]
60    pub hll_registers: usize,
61    #[serde(default = "default_hll_p")]
62    pub hll_precision: u32,
63}
64
65impl Default for SparseTuning {
66    fn default() -> Self {
67        Self {
68            bm25_k1: default_bm25_k1(),
69            bm25_b: default_bm25_b(),
70            max_gsis_per_collection: default_max_gsis_per_collection(),
71            hll_registers: default_hll_m(),
72            hll_precision: default_hll_p(),
73        }
74    }
75}
76
77fn default_bm25_k1() -> f32 {
78    1.2
79}
80fn default_bm25_b() -> f32 {
81    0.75
82}
83fn default_max_gsis_per_collection() -> usize {
84    4
85}
86fn default_hll_m() -> usize {
87    256
88}
89fn default_hll_p() -> u32 {
90    8
91}
92
93/// Default cap on visited nodes during BFS traversals.
94/// Prevents supernode fan-out explosion from consuming unbounded memory.
95pub const DEFAULT_MAX_VISITED: usize = 100_000;
96
97/// Default maximum BFS traversal depth.
98pub const DEFAULT_MAX_DEPTH: usize = 10;
99
100/// Graph engine tuning (traversal limits, LCC algorithm).
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct GraphTuning {
103    #[serde(default = "default_max_visited")]
104    pub max_visited: usize,
105    #[serde(default = "default_max_depth")]
106    pub max_depth: usize,
107    #[serde(default = "default_lcc_high_degree_threshold")]
108    pub lcc_high_degree_threshold: usize,
109    #[serde(default = "default_lcc_sample_pairs")]
110    pub lcc_sample_pairs: usize,
111}
112
113impl Default for GraphTuning {
114    fn default() -> Self {
115        Self {
116            max_visited: default_max_visited(),
117            max_depth: default_max_depth(),
118            lcc_high_degree_threshold: default_lcc_high_degree_threshold(),
119            lcc_sample_pairs: default_lcc_sample_pairs(),
120        }
121    }
122}
123
124fn default_max_visited() -> usize {
125    DEFAULT_MAX_VISITED
126}
127fn default_max_depth() -> usize {
128    DEFAULT_MAX_DEPTH
129}
130fn default_lcc_high_degree_threshold() -> usize {
131    2_000
132}
133fn default_lcc_sample_pairs() -> usize {
134    10_000
135}
136
137/// Timeseries engine tuning (memtable budgets, block sizes).
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct TimeseriesToning {
140    #[serde(default = "default_memtable_budget_bytes")]
141    pub memtable_budget_bytes: usize,
142    #[serde(default = "default_total_budget_bytes")]
143    pub total_budget_bytes: usize,
144    #[serde(default = "default_ts_block_size")]
145    pub block_size: usize,
146}
147
148impl Default for TimeseriesToning {
149    fn default() -> Self {
150        Self {
151            memtable_budget_bytes: default_memtable_budget_bytes(),
152            total_budget_bytes: default_total_budget_bytes(),
153            block_size: default_ts_block_size(),
154        }
155    }
156}
157
158fn default_memtable_budget_bytes() -> usize {
159    64 * 1024 * 1024
160}
161fn default_total_budget_bytes() -> usize {
162    100 * 1024 * 1024
163}
164fn default_ts_block_size() -> usize {
165    1024
166}
167
168/// KV engine tuning (hash table, expiry wheel, slab allocator).
169///
170/// Controls the per-core hash table parameters, incremental rehash behavior,
171/// expiry wheel tick rate, and the per-tick reap budget that prevents reactor
172/// stalls during mass-expiry events.
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct KvTuning {
175    /// Default initial hash table capacity per collection (number of slots).
176    /// Should be a power of two. Larger values reduce early rehash churn for
177    /// collections that will grow quickly. Default: 16384.
178    #[serde(default = "default_kv_capacity")]
179    pub default_capacity: usize,
180
181    /// Hash table load factor threshold that triggers incremental rehash.
182    /// When `entries / capacity > rehash_load_factor`, the table begins
183    /// doubling. Range: 0.5–0.9. Default: 0.75 (standard Robin Hood threshold).
184    #[serde(default = "default_kv_rehash_load_factor")]
185    pub rehash_load_factor: f32,
186
187    /// Number of entries rehashed per PUT during incremental rehash.
188    /// Higher values complete rehash faster but add per-PUT latency.
189    /// Default: 8.
190    #[serde(default = "default_kv_rehash_batch_size")]
191    pub rehash_batch_size: usize,
192
193    /// Default inline value threshold in bytes. Values at or below this size
194    /// are stored directly in the hash entry (no pointer chase). Larger values
195    /// overflow to slab-allocated Binary Tuples. Default: 64.
196    #[serde(default = "default_kv_inline_threshold")]
197    pub default_inline_threshold: usize,
198
199    /// Maximum expirations processed per reactor tick (event loop iteration).
200    /// Prevents mass-expiry events (e.g., 10M keys with identical TTL) from
201    /// stalling the TPC core. Expired-but-not-yet-reaped keys are invisible
202    /// to GET (lazy fallback). Default: 1024.
203    #[serde(default = "default_kv_expiry_reap_budget")]
204    pub expiry_reap_budget: usize,
205
206    /// Expiry wheel tick interval in milliseconds. Determines the granularity
207    /// of TTL expiration. Lower = more precise but more CPU overhead.
208    /// Default: 1000 (1 second).
209    #[serde(default = "default_kv_expiry_tick_ms")]
210    pub expiry_tick_ms: u64,
211}
212
213impl Default for KvTuning {
214    fn default() -> Self {
215        Self {
216            default_capacity: default_kv_capacity(),
217            rehash_load_factor: default_kv_rehash_load_factor(),
218            rehash_batch_size: default_kv_rehash_batch_size(),
219            default_inline_threshold: default_kv_inline_threshold(),
220            expiry_reap_budget: default_kv_expiry_reap_budget(),
221            expiry_tick_ms: default_kv_expiry_tick_ms(),
222        }
223    }
224}
225
226fn default_kv_capacity() -> usize {
227    16_384
228}
229fn default_kv_rehash_load_factor() -> f32 {
230    0.75
231}
232fn default_kv_rehash_batch_size() -> usize {
233    8
234}
235fn default_kv_inline_threshold() -> usize {
236    64
237}
238fn default_kv_expiry_reap_budget() -> usize {
239    1024
240}
241fn default_kv_expiry_tick_ms() -> u64 {
242    1000
243}