Skip to main content

ember_core/
memory.rs

1//! Memory tracking for the keyspace.
2//!
3//! Provides byte-level accounting of memory used by entries. Updated
4//! on every mutation so the engine can enforce memory limits and
5//! report stats without scanning the entire keyspace.
6//!
7//! # Platform notes
8//!
9//! Overhead constants are empirical estimates for 64-bit platforms (x86-64,
10//! aarch64). On 32-bit systems these would be smaller; the effect is that
11//! we'd overestimate memory usage, which triggers eviction earlier than
12//! necessary but doesn't cause correctness issues.
13//!
14//! The constants assume Rust's standard library allocator. Custom allocators
15//! (jemalloc, mimalloc) may have different per-allocation overhead.
16//!
17//! # Safety margin
18//!
19//! Because overhead constants are estimates and allocator fragmentation is
20//! unpredictable, we apply a safety margin when enforcing memory limits.
21//! The effective limit is set to [`MEMORY_SAFETY_MARGIN_PERCENT`]% of the
22//! configured max, reserving headroom so the process doesn't OOM before
23//! eviction has a chance to kick in.
24
25use crate::types::Value;
26
27/// Percentage of the configured `max_memory` that we actually use as the
28/// effective write limit. The remaining headroom absorbs allocator overhead,
29/// internal fragmentation, and estimation error in our per-entry constants.
30///
31/// 90% is conservative — it means a server configured with 1 GB will start
32/// rejecting writes (or evicting) at ~922 MB of estimated usage, leaving
33/// ~100 MB of breathing room for the allocator.
34pub const MEMORY_SAFETY_MARGIN_PERCENT: usize = 90;
35
36/// Computes the effective memory limit after applying the safety margin.
37///
38/// Returns the number of bytes at which writes should be rejected or
39/// eviction should begin — always less than the raw configured limit.
40pub fn effective_limit(max_bytes: usize) -> usize {
41    // use u128 intermediate to avoid overflow on large max_bytes values
42    // while preserving precision for small values
43    ((max_bytes as u128) * (MEMORY_SAFETY_MARGIN_PERCENT as u128) / 100) as usize
44}
45
46/// Estimated overhead per entry in the HashMap.
47///
48/// Accounts for: the CompactString key (24 bytes inline on 64-bit),
49/// Entry struct fields (Value enum tag + Bytes/collection inline storage,
50/// expires_at_ms, cached_value_size, last_access_secs), plus hashbrown
51/// per-entry bookkeeping (1 control byte + empty slot waste at ~87.5%
52/// load factor).
53///
54/// Reduced from 128 to 104 after structural optimizations:
55/// - moved 8-byte `version` field to a lazy side table
56/// - packed `cached_value_size` as u32 (was usize)
57/// - bumped from 100 to 104 for CI platforms where Entry is 72 bytes
58///
59/// This is calibrated from `std::mem::size_of` on 64-bit platforms. The
60/// exact value varies by compiler version, but precision isn't critical —
61/// we use this for eviction triggers and memory reporting, not correctness.
62/// Overestimating is fine (triggers eviction earlier); underestimating could
63/// let memory grow slightly beyond the configured limit.
64///
65/// The `entry_overhead_not_too_small` test validates this constant against
66/// the actual struct sizes on each platform.
67pub(crate) const ENTRY_OVERHEAD: usize = 104;
68
69/// Tracks memory usage for a single keyspace.
70///
71/// All updates are explicit — callers must call `add` / `remove` on every
72/// mutation. This avoids any hidden scanning cost.
73#[derive(Debug)]
74pub struct MemoryTracker {
75    used_bytes: usize,
76    key_count: usize,
77}
78
79impl MemoryTracker {
80    /// Creates a tracker with zero usage.
81    pub fn new() -> Self {
82        Self {
83            used_bytes: 0,
84            key_count: 0,
85        }
86    }
87
88    /// Resets tracking to zero. Used by FLUSHDB.
89    pub fn reset(&mut self) {
90        self.used_bytes = 0;
91        self.key_count = 0;
92    }
93
94    /// Returns the current estimated memory usage in bytes.
95    pub fn used_bytes(&self) -> usize {
96        self.used_bytes
97    }
98
99    /// Returns the number of tracked keys.
100    pub fn key_count(&self) -> usize {
101        self.key_count
102    }
103
104    /// Records the addition of a new entry.
105    pub fn add(&mut self, key: &str, value: &Value) {
106        self.used_bytes += entry_size(key, value);
107        self.key_count += 1;
108    }
109
110    /// Records the removal of an entry.
111    pub fn remove(&mut self, key: &str, value: &Value) {
112        let size = entry_size(key, value);
113        self.used_bytes = self.used_bytes.saturating_sub(size);
114        self.key_count = self.key_count.saturating_sub(1);
115    }
116
117    /// Adjusts tracking when a key's value is overwritten.
118    ///
119    /// Removes the old value's contribution and adds the new one.
120    /// Key count stays the same.
121    pub fn replace(&mut self, key: &str, old_value: &Value, new_value: &Value) {
122        let old_size = entry_size(key, old_value);
123        let new_size = entry_size(key, new_value);
124        self.used_bytes = self
125            .used_bytes
126            .saturating_sub(old_size)
127            .saturating_add(new_size);
128    }
129
130    /// Adjusts used bytes for an in-place mutation (e.g. list push/pop)
131    /// without changing the key count.
132    ///
133    /// `old_entry_size` and `new_entry_size` are the full entry sizes
134    /// (as returned by `entry_size`) before and after the mutation.
135    pub fn adjust(&mut self, old_entry_size: usize, new_entry_size: usize) {
136        self.used_bytes = self
137            .used_bytes
138            .saturating_sub(old_entry_size)
139            .saturating_add(new_entry_size);
140    }
141
142    /// Increases used bytes by `delta` without scanning the entry.
143    ///
144    /// Use this when the caller already knows the exact number of bytes
145    /// being added (e.g. list push where element sizes are precomputed).
146    /// Does not change the key count.
147    pub fn grow_by(&mut self, delta: usize) {
148        self.used_bytes = self.used_bytes.saturating_add(delta);
149    }
150
151    /// Decreases used bytes by `delta` without scanning the entry.
152    ///
153    /// Use this when the caller already knows the exact number of bytes
154    /// being removed (e.g. list pop where the popped element length is known).
155    /// Does not change the key count.
156    pub fn shrink_by(&mut self, delta: usize) {
157        self.used_bytes = self.used_bytes.saturating_sub(delta);
158    }
159
160    /// Removes an entry with an explicit size, useful when the value has
161    /// already been mutated and the original size was captured beforehand.
162    pub fn remove_with_size(&mut self, size: usize) {
163        self.used_bytes = self.used_bytes.saturating_sub(size);
164        self.key_count = self.key_count.saturating_sub(1);
165    }
166}
167
168impl Default for MemoryTracker {
169    fn default() -> Self {
170        Self::new()
171    }
172}
173
174/// Element count threshold below which values are dropped inline rather
175/// than sent to the background drop thread. Strings are always inline
176/// (Bytes::drop is O(1)), but collections with more than this many
177/// elements get deferred.
178pub const LAZY_FREE_THRESHOLD: usize = 64;
179
180/// Returns `true` if dropping this value is expensive enough to justify
181/// sending it to the background drop thread.
182///
183/// Strings are always cheap to drop (reference-counted `Bytes`).
184/// Collections are considered large when they exceed [`LAZY_FREE_THRESHOLD`]
185/// elements.
186pub fn is_large_value(value: &Value) -> bool {
187    match value {
188        Value::String(_) => false,
189        Value::List(d) => d.len() > LAZY_FREE_THRESHOLD,
190        Value::SortedSet(ss) => ss.len() > LAZY_FREE_THRESHOLD,
191        Value::Hash(m) => m.len() > LAZY_FREE_THRESHOLD,
192        Value::Set(s) => s.len() > LAZY_FREE_THRESHOLD,
193        // Vector sets contain usearch Index (C++ object) + hashmaps.
194        // Large sets should be deferred.
195        #[cfg(feature = "vector")]
196        Value::Vector(vs) => vs.len() > LAZY_FREE_THRESHOLD,
197        // Proto values use Bytes (ref-counted, O(1) drop) + a String.
198        // Neither is expensive to drop.
199        #[cfg(feature = "protobuf")]
200        Value::Proto { .. } => false,
201    }
202}
203
204/// Estimates the total memory footprint of a single entry.
205///
206/// key heap allocation + value bytes + fixed per-entry overhead.
207pub fn entry_size(key: &str, value: &Value) -> usize {
208    key.len() + value_size(value) + ENTRY_OVERHEAD
209}
210
211/// Estimated overhead per element in a VecDeque.
212///
213/// Each slot holds a `Bytes` (pointer + len + capacity = 24 bytes on 64-bit)
214/// plus VecDeque's internal bookkeeping per slot.
215pub(crate) const VECDEQUE_ELEMENT_OVERHEAD: usize = 32;
216
217/// Base overhead for an empty VecDeque (internal buffer pointer + head/len).
218pub(crate) const VECDEQUE_BASE_OVERHEAD: usize = 24;
219
220/// Estimated overhead per entry in the packed hash buffer.
221///
222/// Each field is framed with a 2-byte name length and 4-byte value length.
223/// No per-field heap allocation — everything is contiguous in the buffer.
224pub(crate) const PACKED_HASH_ENTRY_OVERHEAD: usize = 6;
225
226/// Base overhead for the packed hash buffer.
227///
228/// Vec struct (ptr + len + cap = 24 bytes) plus the 2-byte field count header.
229pub(crate) const PACKED_HASH_BASE_OVERHEAD: usize = 26;
230
231/// Estimated overhead per entry in a HashMap (for large hashes).
232///
233/// Each entry has: key CompactString (24 bytes), value Bytes (24 bytes),
234/// plus HashMap bucket overhead (~16 bytes for hash + next pointer).
235pub(crate) const HASHMAP_ENTRY_OVERHEAD: usize = 64;
236
237/// Base overhead for an empty HashMap (bucket array pointer + len + capacity).
238pub(crate) const HASHMAP_BASE_OVERHEAD: usize = 48;
239
240/// Estimated overhead per member in a HashSet.
241///
242/// Each member is a String (24 bytes ptr+len+cap) plus bucket overhead.
243pub(crate) const HASHSET_MEMBER_OVERHEAD: usize = 40;
244
245/// Base overhead for an empty HashSet (bucket array pointer + len + capacity).
246pub(crate) const HASHSET_BASE_OVERHEAD: usize = 48;
247
248/// Returns the byte size of a value's payload.
249pub fn value_size(value: &Value) -> usize {
250    match value {
251        Value::String(data) => data.len(),
252        Value::List(deque) => {
253            let element_bytes: usize = deque
254                .iter()
255                .map(|b| b.len() + VECDEQUE_ELEMENT_OVERHEAD)
256                .sum();
257            VECDEQUE_BASE_OVERHEAD + element_bytes
258        }
259        Value::SortedSet(ss) => ss.memory_usage(),
260        Value::Hash(hash) => {
261            use crate::types::hash::HashValue;
262            match hash.as_ref() {
263                HashValue::Packed(buf) => {
264                    // Vec struct overhead (24 bytes) + buffer contents
265                    24 + buf.len()
266                }
267                HashValue::Full(map) => {
268                    let entry_bytes: usize = map
269                        .iter()
270                        .map(|(k, v)| k.len() + v.len() + HASHMAP_ENTRY_OVERHEAD)
271                        .sum();
272                    HASHMAP_BASE_OVERHEAD + entry_bytes
273                }
274            }
275        }
276        Value::Set(set) => {
277            let member_bytes: usize = set.iter().map(|m| m.len() + HASHSET_MEMBER_OVERHEAD).sum();
278            HASHSET_BASE_OVERHEAD + member_bytes
279        }
280        #[cfg(feature = "vector")]
281        Value::Vector(vs) => vs.memory_usage(),
282        // type_name: String struct = 24 bytes (ptr+len+cap) on 64-bit.
283        // data: Bytes struct = ~24 bytes (ptr+len+vtable/arc).
284        #[cfg(feature = "protobuf")]
285        Value::Proto { type_name, data } => type_name.len() + data.len() + 48,
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292    use bytes::Bytes;
293
294    fn string_val(s: &str) -> Value {
295        Value::String(Bytes::from(s.to_string()))
296    }
297
298    #[test]
299    fn new_tracker_is_empty() {
300        let t = MemoryTracker::new();
301        assert_eq!(t.used_bytes(), 0);
302        assert_eq!(t.key_count(), 0);
303    }
304
305    #[test]
306    fn add_increases_usage() {
307        let mut t = MemoryTracker::new();
308        let val = string_val("hello");
309        t.add("key", &val);
310        assert_eq!(t.key_count(), 1);
311        assert_eq!(t.used_bytes(), entry_size("key", &val));
312    }
313
314    #[test]
315    fn remove_decreases_usage() {
316        let mut t = MemoryTracker::new();
317        let val = string_val("data");
318        t.add("k", &val);
319        t.remove("k", &val);
320        assert_eq!(t.used_bytes(), 0);
321        assert_eq!(t.key_count(), 0);
322    }
323
324    #[test]
325    fn replace_adjusts_usage() {
326        let mut t = MemoryTracker::new();
327        let old = string_val("short");
328        let new = string_val("a much longer value here");
329        t.add("k", &old);
330
331        let before = t.used_bytes();
332        t.replace("k", &old, &new);
333
334        assert_eq!(t.key_count(), 1);
335        // new value is longer, so usage should increase
336        assert!(t.used_bytes() > before);
337        assert_eq!(t.used_bytes(), entry_size("k", &new),);
338    }
339
340    #[test]
341    fn remove_saturates_at_zero() {
342        let mut t = MemoryTracker::new();
343        let val = string_val("x");
344        // remove without add — should not underflow
345        t.remove("k", &val);
346        assert_eq!(t.used_bytes(), 0);
347        assert_eq!(t.key_count(), 0);
348    }
349
350    #[test]
351    fn entry_size_accounts_for_key_and_value() {
352        let val = string_val("test");
353        let size = entry_size("mykey", &val);
354        // 5 (key) + 4 (value) + ENTRY_OVERHEAD
355        assert_eq!(size, 5 + 4 + ENTRY_OVERHEAD);
356    }
357
358    /// Validates that ENTRY_OVERHEAD is at least as large as the actual
359    /// struct sizes, so we never underestimate memory usage.
360    #[test]
361    fn entry_overhead_not_too_small() {
362        use crate::keyspace::Entry;
363        use compact_str::CompactString;
364
365        let entry_size = std::mem::size_of::<Entry>();
366        let key_struct_size = std::mem::size_of::<CompactString>();
367        // hashbrown uses 1 control byte per slot + ~14% empty slot waste.
368        // 8 bytes is a conservative lower bound for per-entry hash overhead.
369        let hashmap_per_entry = 8;
370        let minimum = entry_size + key_struct_size + hashmap_per_entry;
371
372        assert!(
373            ENTRY_OVERHEAD >= minimum,
374            "ENTRY_OVERHEAD ({ENTRY_OVERHEAD}) is less than measured minimum \
375             ({minimum} = Entry({entry_size}) + CompactString({key_struct_size}) + \
376             hashmap({hashmap_per_entry}))"
377        );
378    }
379
380    #[test]
381    fn list_value_size() {
382        let mut deque = std::collections::VecDeque::new();
383        deque.push_back(Bytes::from("hello"));
384        deque.push_back(Bytes::from("world"));
385        let val = Value::List(deque);
386
387        let size = value_size(&val);
388        // base overhead + 2 elements (each: data len + element overhead)
389        let expected = VECDEQUE_BASE_OVERHEAD
390            + (5 + VECDEQUE_ELEMENT_OVERHEAD)
391            + (5 + VECDEQUE_ELEMENT_OVERHEAD);
392        assert_eq!(size, expected);
393    }
394
395    #[test]
396    fn empty_list_value_size() {
397        let val = Value::List(std::collections::VecDeque::new());
398        assert_eq!(value_size(&val), VECDEQUE_BASE_OVERHEAD);
399    }
400
401    #[test]
402    fn multiple_entries() {
403        let mut t = MemoryTracker::new();
404        let v1 = string_val("aaa");
405        let v2 = string_val("bbbbb");
406        t.add("k1", &v1);
407        t.add("k2", &v2);
408
409        assert_eq!(t.key_count(), 2);
410        assert_eq!(
411            t.used_bytes(),
412            entry_size("k1", &v1) + entry_size("k2", &v2),
413        );
414
415        t.remove("k1", &v1);
416        assert_eq!(t.key_count(), 1);
417        assert_eq!(t.used_bytes(), entry_size("k2", &v2));
418    }
419
420    #[test]
421    fn effective_limit_applies_margin() {
422        // 1000 bytes configured → 900 effective at 90%
423        assert_eq!(effective_limit(1000), 900);
424    }
425
426    #[test]
427    fn effective_limit_rounds_down() {
428        // 1001 * 90 / 100 = 900 (integer division truncates)
429        assert_eq!(effective_limit(1001), 900);
430    }
431
432    #[test]
433    fn effective_limit_zero() {
434        assert_eq!(effective_limit(0), 0);
435    }
436
437    #[test]
438    fn string_is_never_large() {
439        let val = Value::String(Bytes::from(vec![0u8; 10_000]));
440        assert!(!is_large_value(&val));
441    }
442
443    #[test]
444    fn small_list_is_not_large() {
445        let mut d = std::collections::VecDeque::new();
446        for _ in 0..LAZY_FREE_THRESHOLD {
447            d.push_back(Bytes::from("x"));
448        }
449        assert!(!is_large_value(&Value::List(d)));
450    }
451
452    #[test]
453    fn big_list_is_large() {
454        let mut d = std::collections::VecDeque::new();
455        for _ in 0..=LAZY_FREE_THRESHOLD {
456            d.push_back(Bytes::from("x"));
457        }
458        assert!(is_large_value(&Value::List(d)));
459    }
460
461    #[test]
462    fn big_hash_is_large() {
463        use crate::types::hash::HashValue;
464        let mut h = HashValue::default();
465        for i in 0..=LAZY_FREE_THRESHOLD {
466            h.insert(format!("f{i}").into(), Bytes::from("v"));
467        }
468        assert!(is_large_value(&Value::Hash(Box::new(h))));
469    }
470
471    #[test]
472    fn big_set_is_large() {
473        let mut s = std::collections::HashSet::new();
474        for i in 0..=LAZY_FREE_THRESHOLD {
475            s.insert(format!("m{i}"));
476        }
477        assert!(is_large_value(&Value::Set(Box::new(s))));
478    }
479}