Skip to main content

ember_core/
memory.rs

1//! Memory tracking for the keyspace.
2//!
3//! Provides byte-level accounting of memory used by entries. Updated
4//! on every mutation so the engine can enforce memory limits and
5//! report stats without scanning the entire keyspace.
6//!
7//! # Platform notes
8//!
9//! Overhead constants are empirical estimates for 64-bit platforms (x86-64,
10//! aarch64). On 32-bit systems these would be smaller; the effect is that
11//! we'd overestimate memory usage, which triggers eviction earlier than
12//! necessary but doesn't cause correctness issues.
13//!
14//! The constants assume Rust's standard library allocator. Custom allocators
15//! (jemalloc, mimalloc) may have different per-allocation overhead.
16//!
17//! # Safety margin
18//!
19//! Because overhead constants are estimates and allocator fragmentation is
20//! unpredictable, we apply a safety margin when enforcing memory limits.
21//! The effective limit is set to [`MEMORY_SAFETY_MARGIN_PERCENT`]% of the
22//! configured max, reserving headroom so the process doesn't OOM before
23//! eviction has a chance to kick in.
24
25use crate::types::Value;
26
27/// Percentage of the configured `max_memory` that we actually use as the
28/// effective write limit. The remaining headroom absorbs allocator overhead,
29/// internal fragmentation, and estimation error in our per-entry constants.
30///
31/// 90% is conservative — it means a server configured with 1 GB will start
32/// rejecting writes (or evicting) at ~922 MB of estimated usage, leaving
33/// ~100 MB of breathing room for the allocator.
34pub const MEMORY_SAFETY_MARGIN_PERCENT: usize = 90;
35
36/// Computes the effective memory limit after applying the safety margin.
37///
38/// Returns the number of bytes at which writes should be rejected or
39/// eviction should begin — always less than the raw configured limit.
40pub fn effective_limit(max_bytes: usize) -> usize {
41    // use u128 intermediate to avoid overflow on large max_bytes values
42    // while preserving precision for small values
43    ((max_bytes as u128) * (MEMORY_SAFETY_MARGIN_PERCENT as u128) / 100) as usize
44}
45
46/// Estimated overhead per entry in the HashMap.
47///
48/// Accounts for: the String key struct (24 bytes ptr+len+cap), Entry struct
49/// fields (Value enum tag + Bytes/collection inline storage + expires_at_ms
50/// + last_access_ms), plus hashbrown per-entry bookkeeping (1 control byte
51/// + empty slot waste at ~87.5% load factor).
52///
53/// This is calibrated from `std::mem::size_of` on 64-bit platforms. The
54/// exact value varies by compiler version, but precision isn't critical —
55/// we use this for eviction triggers and memory reporting, not correctness.
56/// Overestimating is fine (triggers eviction earlier); underestimating could
57/// let memory grow slightly beyond the configured limit.
58///
59/// The `entry_overhead_not_too_small` test validates this constant against
60/// the actual struct sizes on each platform.
61pub(crate) const ENTRY_OVERHEAD: usize = 128;
62
63/// Tracks memory usage for a single keyspace.
64///
65/// All updates are explicit — callers must call `add` / `remove` on every
66/// mutation. This avoids any hidden scanning cost.
67#[derive(Debug)]
68pub struct MemoryTracker {
69    used_bytes: usize,
70    key_count: usize,
71}
72
73impl MemoryTracker {
74    /// Creates a tracker with zero usage.
75    pub fn new() -> Self {
76        Self {
77            used_bytes: 0,
78            key_count: 0,
79        }
80    }
81
82    /// Resets tracking to zero. Used by FLUSHDB.
83    pub fn reset(&mut self) {
84        self.used_bytes = 0;
85        self.key_count = 0;
86    }
87
88    /// Returns the current estimated memory usage in bytes.
89    pub fn used_bytes(&self) -> usize {
90        self.used_bytes
91    }
92
93    /// Returns the number of tracked keys.
94    pub fn key_count(&self) -> usize {
95        self.key_count
96    }
97
98    /// Records the addition of a new entry.
99    pub fn add(&mut self, key: &str, value: &Value) {
100        self.used_bytes += entry_size(key, value);
101        self.key_count += 1;
102    }
103
104    /// Records the removal of an entry.
105    pub fn remove(&mut self, key: &str, value: &Value) {
106        let size = entry_size(key, value);
107        self.used_bytes = self.used_bytes.saturating_sub(size);
108        self.key_count = self.key_count.saturating_sub(1);
109    }
110
111    /// Adjusts tracking when a key's value is overwritten.
112    ///
113    /// Removes the old value's contribution and adds the new one.
114    /// Key count stays the same.
115    pub fn replace(&mut self, key: &str, old_value: &Value, new_value: &Value) {
116        let old_size = entry_size(key, old_value);
117        let new_size = entry_size(key, new_value);
118        self.used_bytes = self
119            .used_bytes
120            .saturating_sub(old_size)
121            .saturating_add(new_size);
122    }
123
124    /// Adjusts used bytes for an in-place mutation (e.g. list push/pop)
125    /// without changing the key count.
126    ///
127    /// `old_entry_size` and `new_entry_size` are the full entry sizes
128    /// (as returned by `entry_size`) before and after the mutation.
129    pub fn adjust(&mut self, old_entry_size: usize, new_entry_size: usize) {
130        self.used_bytes = self
131            .used_bytes
132            .saturating_sub(old_entry_size)
133            .saturating_add(new_entry_size);
134    }
135
136    /// Removes an entry with an explicit size, useful when the value has
137    /// already been mutated and the original size was captured beforehand.
138    pub fn remove_with_size(&mut self, size: usize) {
139        self.used_bytes = self.used_bytes.saturating_sub(size);
140        self.key_count = self.key_count.saturating_sub(1);
141    }
142}
143
144impl Default for MemoryTracker {
145    fn default() -> Self {
146        Self::new()
147    }
148}
149
150/// Element count threshold below which values are dropped inline rather
151/// than sent to the background drop thread. Strings are always inline
152/// (Bytes::drop is O(1)), but collections with more than this many
153/// elements get deferred.
154pub const LAZY_FREE_THRESHOLD: usize = 64;
155
156/// Returns `true` if dropping this value is expensive enough to justify
157/// sending it to the background drop thread.
158///
159/// Strings are always cheap to drop (reference-counted `Bytes`).
160/// Collections are considered large when they exceed [`LAZY_FREE_THRESHOLD`]
161/// elements.
162pub fn is_large_value(value: &Value) -> bool {
163    match value {
164        Value::String(_) => false,
165        Value::List(d) => d.len() > LAZY_FREE_THRESHOLD,
166        Value::SortedSet(ss) => ss.len() > LAZY_FREE_THRESHOLD,
167        Value::Hash(m) => m.len() > LAZY_FREE_THRESHOLD,
168        Value::Set(s) => s.len() > LAZY_FREE_THRESHOLD,
169        // Vector sets contain usearch Index (C++ object) + hashmaps.
170        // Large sets should be deferred.
171        #[cfg(feature = "vector")]
172        Value::Vector(vs) => vs.len() > LAZY_FREE_THRESHOLD,
173        // Proto values use Bytes (ref-counted, O(1) drop) + a String.
174        // Neither is expensive to drop.
175        #[cfg(feature = "protobuf")]
176        Value::Proto { .. } => false,
177    }
178}
179
180/// Estimates the total memory footprint of a single entry.
181///
182/// key heap allocation + value bytes + fixed per-entry overhead.
183pub fn entry_size(key: &str, value: &Value) -> usize {
184    key.len() + value_size(value) + ENTRY_OVERHEAD
185}
186
187/// Estimated overhead per element in a VecDeque.
188///
189/// Each slot holds a `Bytes` (pointer + len + capacity = 24 bytes on 64-bit)
190/// plus VecDeque's internal bookkeeping per slot.
191pub(crate) const VECDEQUE_ELEMENT_OVERHEAD: usize = 32;
192
193/// Base overhead for an empty VecDeque (internal buffer pointer + head/len).
194pub(crate) const VECDEQUE_BASE_OVERHEAD: usize = 24;
195
196/// Estimated overhead per entry in a HashMap (for hash type).
197///
198/// Each entry has: key String (24 bytes ptr+len+cap), value Bytes (24 bytes),
199/// plus HashMap bucket overhead (~16 bytes for hash + next pointer).
200pub(crate) const HASHMAP_ENTRY_OVERHEAD: usize = 64;
201
202/// Base overhead for an empty HashMap (bucket array pointer + len + capacity).
203pub(crate) const HASHMAP_BASE_OVERHEAD: usize = 48;
204
205/// Estimated overhead per member in a HashSet.
206///
207/// Each member is a String (24 bytes ptr+len+cap) plus bucket overhead.
208pub(crate) const HASHSET_MEMBER_OVERHEAD: usize = 40;
209
210/// Base overhead for an empty HashSet (bucket array pointer + len + capacity).
211pub(crate) const HASHSET_BASE_OVERHEAD: usize = 48;
212
213/// Returns the byte size of a value's payload.
214pub fn value_size(value: &Value) -> usize {
215    match value {
216        Value::String(data) => data.len(),
217        Value::List(deque) => {
218            let element_bytes: usize = deque
219                .iter()
220                .map(|b| b.len() + VECDEQUE_ELEMENT_OVERHEAD)
221                .sum();
222            VECDEQUE_BASE_OVERHEAD + element_bytes
223        }
224        Value::SortedSet(ss) => ss.memory_usage(),
225        Value::Hash(map) => {
226            let entry_bytes: usize = map
227                .iter()
228                .map(|(k, v)| k.len() + v.len() + HASHMAP_ENTRY_OVERHEAD)
229                .sum();
230            HASHMAP_BASE_OVERHEAD + entry_bytes
231        }
232        Value::Set(set) => {
233            let member_bytes: usize = set.iter().map(|m| m.len() + HASHSET_MEMBER_OVERHEAD).sum();
234            HASHSET_BASE_OVERHEAD + member_bytes
235        }
236        #[cfg(feature = "vector")]
237        Value::Vector(vs) => vs.memory_usage(),
238        // type_name: String struct = 24 bytes (ptr+len+cap) on 64-bit.
239        // data: Bytes struct = ~24 bytes (ptr+len+vtable/arc).
240        #[cfg(feature = "protobuf")]
241        Value::Proto { type_name, data } => type_name.len() + data.len() + 48,
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use bytes::Bytes;
249
250    fn string_val(s: &str) -> Value {
251        Value::String(Bytes::from(s.to_string()))
252    }
253
254    #[test]
255    fn new_tracker_is_empty() {
256        let t = MemoryTracker::new();
257        assert_eq!(t.used_bytes(), 0);
258        assert_eq!(t.key_count(), 0);
259    }
260
261    #[test]
262    fn add_increases_usage() {
263        let mut t = MemoryTracker::new();
264        let val = string_val("hello");
265        t.add("key", &val);
266        assert_eq!(t.key_count(), 1);
267        assert_eq!(t.used_bytes(), entry_size("key", &val));
268    }
269
270    #[test]
271    fn remove_decreases_usage() {
272        let mut t = MemoryTracker::new();
273        let val = string_val("data");
274        t.add("k", &val);
275        t.remove("k", &val);
276        assert_eq!(t.used_bytes(), 0);
277        assert_eq!(t.key_count(), 0);
278    }
279
280    #[test]
281    fn replace_adjusts_usage() {
282        let mut t = MemoryTracker::new();
283        let old = string_val("short");
284        let new = string_val("a much longer value here");
285        t.add("k", &old);
286
287        let before = t.used_bytes();
288        t.replace("k", &old, &new);
289
290        assert_eq!(t.key_count(), 1);
291        // new value is longer, so usage should increase
292        assert!(t.used_bytes() > before);
293        assert_eq!(t.used_bytes(), entry_size("k", &new),);
294    }
295
296    #[test]
297    fn remove_saturates_at_zero() {
298        let mut t = MemoryTracker::new();
299        let val = string_val("x");
300        // remove without add — should not underflow
301        t.remove("k", &val);
302        assert_eq!(t.used_bytes(), 0);
303        assert_eq!(t.key_count(), 0);
304    }
305
306    #[test]
307    fn entry_size_accounts_for_key_and_value() {
308        let val = string_val("test");
309        let size = entry_size("mykey", &val);
310        // 5 (key) + 4 (value) + ENTRY_OVERHEAD
311        assert_eq!(size, 5 + 4 + ENTRY_OVERHEAD);
312    }
313
314    /// Validates that ENTRY_OVERHEAD is at least as large as the actual
315    /// struct sizes, so we never underestimate memory usage.
316    #[test]
317    fn entry_overhead_not_too_small() {
318        use crate::keyspace::Entry;
319
320        let entry_size = std::mem::size_of::<Entry>();
321        let key_struct_size = std::mem::size_of::<String>();
322        // hashbrown uses 1 control byte per slot + ~14% empty slot waste.
323        // 8 bytes is a conservative lower bound for per-entry hash overhead.
324        let hashmap_per_entry = 8;
325        let minimum = entry_size + key_struct_size + hashmap_per_entry;
326
327        assert!(
328            ENTRY_OVERHEAD >= minimum,
329            "ENTRY_OVERHEAD ({ENTRY_OVERHEAD}) is less than measured minimum \
330             ({minimum} = Entry({entry_size}) + String({key_struct_size}) + \
331             hashmap({hashmap_per_entry}))"
332        );
333    }
334
335    #[test]
336    fn list_value_size() {
337        let mut deque = std::collections::VecDeque::new();
338        deque.push_back(Bytes::from("hello"));
339        deque.push_back(Bytes::from("world"));
340        let val = Value::List(deque);
341
342        let size = value_size(&val);
343        // base overhead + 2 elements (each: data len + element overhead)
344        let expected = VECDEQUE_BASE_OVERHEAD
345            + (5 + VECDEQUE_ELEMENT_OVERHEAD)
346            + (5 + VECDEQUE_ELEMENT_OVERHEAD);
347        assert_eq!(size, expected);
348    }
349
350    #[test]
351    fn empty_list_value_size() {
352        let val = Value::List(std::collections::VecDeque::new());
353        assert_eq!(value_size(&val), VECDEQUE_BASE_OVERHEAD);
354    }
355
356    #[test]
357    fn multiple_entries() {
358        let mut t = MemoryTracker::new();
359        let v1 = string_val("aaa");
360        let v2 = string_val("bbbbb");
361        t.add("k1", &v1);
362        t.add("k2", &v2);
363
364        assert_eq!(t.key_count(), 2);
365        assert_eq!(
366            t.used_bytes(),
367            entry_size("k1", &v1) + entry_size("k2", &v2),
368        );
369
370        t.remove("k1", &v1);
371        assert_eq!(t.key_count(), 1);
372        assert_eq!(t.used_bytes(), entry_size("k2", &v2));
373    }
374
375    #[test]
376    fn effective_limit_applies_margin() {
377        // 1000 bytes configured → 900 effective at 90%
378        assert_eq!(effective_limit(1000), 900);
379    }
380
381    #[test]
382    fn effective_limit_rounds_down() {
383        // 1001 * 90 / 100 = 900 (integer division truncates)
384        assert_eq!(effective_limit(1001), 900);
385    }
386
387    #[test]
388    fn effective_limit_zero() {
389        assert_eq!(effective_limit(0), 0);
390    }
391
392    #[test]
393    fn string_is_never_large() {
394        let val = Value::String(Bytes::from(vec![0u8; 10_000]));
395        assert!(!is_large_value(&val));
396    }
397
398    #[test]
399    fn small_list_is_not_large() {
400        let mut d = std::collections::VecDeque::new();
401        for _ in 0..LAZY_FREE_THRESHOLD {
402            d.push_back(Bytes::from("x"));
403        }
404        assert!(!is_large_value(&Value::List(d)));
405    }
406
407    #[test]
408    fn big_list_is_large() {
409        let mut d = std::collections::VecDeque::new();
410        for _ in 0..=LAZY_FREE_THRESHOLD {
411            d.push_back(Bytes::from("x"));
412        }
413        assert!(is_large_value(&Value::List(d)));
414    }
415
416    #[test]
417    fn big_hash_is_large() {
418        let mut m = std::collections::HashMap::new();
419        for i in 0..=LAZY_FREE_THRESHOLD {
420            m.insert(format!("f{i}"), Bytes::from("v"));
421        }
422        assert!(is_large_value(&Value::Hash(m)));
423    }
424
425    #[test]
426    fn big_set_is_large() {
427        let mut s = std::collections::HashSet::new();
428        for i in 0..=LAZY_FREE_THRESHOLD {
429            s.insert(format!("m{i}"));
430        }
431        assert!(is_large_value(&Value::Set(s)));
432    }
433}