ember_core/
memory.rs

1//! Memory tracking for the keyspace.
2//!
3//! Provides byte-level accounting of memory used by entries. Updated
4//! on every mutation so the engine can enforce memory limits and
5//! report stats without scanning the entire keyspace.
6//!
7//! # Platform notes
8//!
9//! Overhead constants are empirical estimates for 64-bit platforms (x86-64,
10//! aarch64). On 32-bit systems these would be smaller; the effect is that
11//! we'd overestimate memory usage, which triggers eviction earlier than
12//! necessary but doesn't cause correctness issues.
13//!
14//! The constants assume Rust's standard library allocator. Custom allocators
15//! (jemalloc, mimalloc) may have different per-allocation overhead.
16//!
17//! # Safety margin
18//!
19//! Because overhead constants are estimates and allocator fragmentation is
20//! unpredictable, we apply a safety margin when enforcing memory limits.
21//! The effective limit is set to [`MEMORY_SAFETY_MARGIN_PERCENT`]% of the
22//! configured max, reserving headroom so the process doesn't OOM before
23//! eviction has a chance to kick in.
24
25use crate::types::Value;
26
27/// Percentage of the configured `max_memory` that we actually use as the
28/// effective write limit. The remaining headroom absorbs allocator overhead,
29/// internal fragmentation, and estimation error in our per-entry constants.
30///
31/// 90% is conservative — it means a server configured with 1 GB will start
32/// rejecting writes (or evicting) at ~922 MB of estimated usage, leaving
33/// ~100 MB of breathing room for the allocator.
34pub const MEMORY_SAFETY_MARGIN_PERCENT: usize = 90;
35
36/// Computes the effective memory limit after applying the safety margin.
37///
38/// Returns the number of bytes at which writes should be rejected or
39/// eviction should begin — always less than the raw configured limit.
40pub fn effective_limit(max_bytes: usize) -> usize {
41    max_bytes * MEMORY_SAFETY_MARGIN_PERCENT / 100
42}
43
44/// Estimated overhead per entry in the HashMap.
45///
46/// Accounts for: HashMap bucket pointer (8), Entry struct fields
47/// (Option<Instant> = 16, last_access Instant = 8, Value enum tag + padding),
48/// plus HashMap per-entry bookkeeping.
49///
50/// This is an approximation measured empirically on x86-64 linux. The exact
51/// value varies by platform and compiler version, but precision isn't critical —
52/// we use this for eviction triggers and memory reporting, not for correctness.
53/// Overestimating is fine (triggers eviction earlier); underestimating could
54/// theoretically let memory grow slightly beyond the configured limit.
55pub(crate) const ENTRY_OVERHEAD: usize = 96;
56
57/// Tracks memory usage for a single keyspace.
58///
59/// All updates are explicit — callers must call `add` / `remove` on every
60/// mutation. This avoids any hidden scanning cost.
61#[derive(Debug)]
62pub struct MemoryTracker {
63    used_bytes: usize,
64    key_count: usize,
65}
66
67impl MemoryTracker {
68    /// Creates a tracker with zero usage.
69    pub fn new() -> Self {
70        Self {
71            used_bytes: 0,
72            key_count: 0,
73        }
74    }
75
76    /// Resets tracking to zero. Used by FLUSHDB.
77    pub fn reset(&mut self) {
78        self.used_bytes = 0;
79        self.key_count = 0;
80    }
81
82    /// Returns the current estimated memory usage in bytes.
83    pub fn used_bytes(&self) -> usize {
84        self.used_bytes
85    }
86
87    /// Returns the number of tracked keys.
88    pub fn key_count(&self) -> usize {
89        self.key_count
90    }
91
92    /// Records the addition of a new entry.
93    pub fn add(&mut self, key: &str, value: &Value) {
94        self.used_bytes += entry_size(key, value);
95        self.key_count += 1;
96    }
97
98    /// Records the removal of an entry.
99    pub fn remove(&mut self, key: &str, value: &Value) {
100        let size = entry_size(key, value);
101        self.used_bytes = self.used_bytes.saturating_sub(size);
102        self.key_count = self.key_count.saturating_sub(1);
103    }
104
105    /// Adjusts tracking when a key's value is overwritten.
106    ///
107    /// Removes the old value's contribution and adds the new one.
108    /// Key count stays the same.
109    pub fn replace(&mut self, key: &str, old_value: &Value, new_value: &Value) {
110        let old_size = entry_size(key, old_value);
111        let new_size = entry_size(key, new_value);
112        self.used_bytes = self
113            .used_bytes
114            .saturating_sub(old_size)
115            .saturating_add(new_size);
116    }
117
118    /// Adjusts used bytes for an in-place mutation (e.g. list push/pop)
119    /// without changing the key count.
120    ///
121    /// `old_entry_size` and `new_entry_size` are the full entry sizes
122    /// (as returned by `entry_size`) before and after the mutation.
123    pub fn adjust(&mut self, old_entry_size: usize, new_entry_size: usize) {
124        self.used_bytes = self
125            .used_bytes
126            .saturating_sub(old_entry_size)
127            .saturating_add(new_entry_size);
128    }
129
130    /// Removes an entry with an explicit size, useful when the value has
131    /// already been mutated and the original size was captured beforehand.
132    pub fn remove_with_size(&mut self, size: usize) {
133        self.used_bytes = self.used_bytes.saturating_sub(size);
134        self.key_count = self.key_count.saturating_sub(1);
135    }
136}
137
138impl Default for MemoryTracker {
139    fn default() -> Self {
140        Self::new()
141    }
142}
143
144/// Element count threshold below which values are dropped inline rather
145/// than sent to the background drop thread. Strings are always inline
146/// (Bytes::drop is O(1)), but collections with more than this many
147/// elements get deferred.
148pub const LAZY_FREE_THRESHOLD: usize = 64;
149
150/// Returns `true` if dropping this value is expensive enough to justify
151/// sending it to the background drop thread.
152///
153/// Strings are always cheap to drop (reference-counted `Bytes`).
154/// Collections are considered large when they exceed [`LAZY_FREE_THRESHOLD`]
155/// elements.
156pub fn is_large_value(value: &Value) -> bool {
157    match value {
158        Value::String(_) => false,
159        Value::List(d) => d.len() > LAZY_FREE_THRESHOLD,
160        Value::SortedSet(ss) => ss.len() > LAZY_FREE_THRESHOLD,
161        Value::Hash(m) => m.len() > LAZY_FREE_THRESHOLD,
162        Value::Set(s) => s.len() > LAZY_FREE_THRESHOLD,
163    }
164}
165
166/// Estimates the total memory footprint of a single entry.
167///
168/// key heap allocation + value bytes + fixed per-entry overhead.
169pub fn entry_size(key: &str, value: &Value) -> usize {
170    key.len() + value_size(value) + ENTRY_OVERHEAD
171}
172
173/// Estimated overhead per element in a VecDeque.
174///
175/// Each slot holds a `Bytes` (pointer + len + capacity = 24 bytes on 64-bit)
176/// plus VecDeque's internal bookkeeping per slot.
177pub(crate) const VECDEQUE_ELEMENT_OVERHEAD: usize = 32;
178
179/// Base overhead for an empty VecDeque (internal buffer pointer + head/len).
180pub(crate) const VECDEQUE_BASE_OVERHEAD: usize = 24;
181
182/// Estimated overhead per entry in a HashMap (for hash type).
183///
184/// Each entry has: key String (24 bytes ptr+len+cap), value Bytes (24 bytes),
185/// plus HashMap bucket overhead (~16 bytes for hash + next pointer).
186pub(crate) const HASHMAP_ENTRY_OVERHEAD: usize = 64;
187
188/// Base overhead for an empty HashMap (bucket array pointer + len + capacity).
189pub(crate) const HASHMAP_BASE_OVERHEAD: usize = 48;
190
191/// Estimated overhead per member in a HashSet.
192///
193/// Each member is a String (24 bytes ptr+len+cap) plus bucket overhead.
194pub(crate) const HASHSET_MEMBER_OVERHEAD: usize = 40;
195
196/// Base overhead for an empty HashSet (bucket array pointer + len + capacity).
197pub(crate) const HASHSET_BASE_OVERHEAD: usize = 48;
198
199/// Returns the byte size of a value's payload.
200pub fn value_size(value: &Value) -> usize {
201    match value {
202        Value::String(data) => data.len(),
203        Value::List(deque) => {
204            let element_bytes: usize = deque
205                .iter()
206                .map(|b| b.len() + VECDEQUE_ELEMENT_OVERHEAD)
207                .sum();
208            VECDEQUE_BASE_OVERHEAD + element_bytes
209        }
210        Value::SortedSet(ss) => ss.memory_usage(),
211        Value::Hash(map) => {
212            let entry_bytes: usize = map
213                .iter()
214                .map(|(k, v)| k.len() + v.len() + HASHMAP_ENTRY_OVERHEAD)
215                .sum();
216            HASHMAP_BASE_OVERHEAD + entry_bytes
217        }
218        Value::Set(set) => {
219            let member_bytes: usize = set.iter().map(|m| m.len() + HASHSET_MEMBER_OVERHEAD).sum();
220            HASHSET_BASE_OVERHEAD + member_bytes
221        }
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use bytes::Bytes;
229
230    fn string_val(s: &str) -> Value {
231        Value::String(Bytes::from(s.to_string()))
232    }
233
234    #[test]
235    fn new_tracker_is_empty() {
236        let t = MemoryTracker::new();
237        assert_eq!(t.used_bytes(), 0);
238        assert_eq!(t.key_count(), 0);
239    }
240
241    #[test]
242    fn add_increases_usage() {
243        let mut t = MemoryTracker::new();
244        let val = string_val("hello");
245        t.add("key", &val);
246        assert_eq!(t.key_count(), 1);
247        assert_eq!(t.used_bytes(), entry_size("key", &val));
248    }
249
250    #[test]
251    fn remove_decreases_usage() {
252        let mut t = MemoryTracker::new();
253        let val = string_val("data");
254        t.add("k", &val);
255        t.remove("k", &val);
256        assert_eq!(t.used_bytes(), 0);
257        assert_eq!(t.key_count(), 0);
258    }
259
260    #[test]
261    fn replace_adjusts_usage() {
262        let mut t = MemoryTracker::new();
263        let old = string_val("short");
264        let new = string_val("a much longer value here");
265        t.add("k", &old);
266
267        let before = t.used_bytes();
268        t.replace("k", &old, &new);
269
270        assert_eq!(t.key_count(), 1);
271        // new value is longer, so usage should increase
272        assert!(t.used_bytes() > before);
273        assert_eq!(t.used_bytes(), entry_size("k", &new),);
274    }
275
276    #[test]
277    fn remove_saturates_at_zero() {
278        let mut t = MemoryTracker::new();
279        let val = string_val("x");
280        // remove without add — should not underflow
281        t.remove("k", &val);
282        assert_eq!(t.used_bytes(), 0);
283        assert_eq!(t.key_count(), 0);
284    }
285
286    #[test]
287    fn entry_size_accounts_for_key_and_value() {
288        let val = string_val("test");
289        let size = entry_size("mykey", &val);
290        // 5 (key) + 4 (value) + 96 (overhead)
291        assert_eq!(size, 5 + 4 + ENTRY_OVERHEAD);
292    }
293
294    #[test]
295    fn list_value_size() {
296        let mut deque = std::collections::VecDeque::new();
297        deque.push_back(Bytes::from("hello"));
298        deque.push_back(Bytes::from("world"));
299        let val = Value::List(deque);
300
301        let size = value_size(&val);
302        // base overhead + 2 elements (each: data len + element overhead)
303        let expected = VECDEQUE_BASE_OVERHEAD
304            + (5 + VECDEQUE_ELEMENT_OVERHEAD)
305            + (5 + VECDEQUE_ELEMENT_OVERHEAD);
306        assert_eq!(size, expected);
307    }
308
309    #[test]
310    fn empty_list_value_size() {
311        let val = Value::List(std::collections::VecDeque::new());
312        assert_eq!(value_size(&val), VECDEQUE_BASE_OVERHEAD);
313    }
314
315    #[test]
316    fn multiple_entries() {
317        let mut t = MemoryTracker::new();
318        let v1 = string_val("aaa");
319        let v2 = string_val("bbbbb");
320        t.add("k1", &v1);
321        t.add("k2", &v2);
322
323        assert_eq!(t.key_count(), 2);
324        assert_eq!(
325            t.used_bytes(),
326            entry_size("k1", &v1) + entry_size("k2", &v2),
327        );
328
329        t.remove("k1", &v1);
330        assert_eq!(t.key_count(), 1);
331        assert_eq!(t.used_bytes(), entry_size("k2", &v2));
332    }
333
334    #[test]
335    fn effective_limit_applies_margin() {
336        // 1000 bytes configured → 900 effective at 90%
337        assert_eq!(effective_limit(1000), 900);
338    }
339
340    #[test]
341    fn effective_limit_rounds_down() {
342        // 1001 * 90 / 100 = 900 (integer division truncates)
343        assert_eq!(effective_limit(1001), 900);
344    }
345
346    #[test]
347    fn effective_limit_zero() {
348        assert_eq!(effective_limit(0), 0);
349    }
350
351    #[test]
352    fn string_is_never_large() {
353        let val = Value::String(Bytes::from(vec![0u8; 10_000]));
354        assert!(!is_large_value(&val));
355    }
356
357    #[test]
358    fn small_list_is_not_large() {
359        let mut d = std::collections::VecDeque::new();
360        for _ in 0..LAZY_FREE_THRESHOLD {
361            d.push_back(Bytes::from("x"));
362        }
363        assert!(!is_large_value(&Value::List(d)));
364    }
365
366    #[test]
367    fn big_list_is_large() {
368        let mut d = std::collections::VecDeque::new();
369        for _ in 0..=LAZY_FREE_THRESHOLD {
370            d.push_back(Bytes::from("x"));
371        }
372        assert!(is_large_value(&Value::List(d)));
373    }
374
375    #[test]
376    fn big_hash_is_large() {
377        let mut m = std::collections::HashMap::new();
378        for i in 0..=LAZY_FREE_THRESHOLD {
379            m.insert(format!("f{i}"), Bytes::from("v"));
380        }
381        assert!(is_large_value(&Value::Hash(m)));
382    }
383
384    #[test]
385    fn big_set_is_large() {
386        let mut s = std::collections::HashSet::new();
387        for i in 0..=LAZY_FREE_THRESHOLD {
388            s.insert(format!("m{i}"));
389        }
390        assert!(is_large_value(&Value::Set(s)));
391    }
392}
ember_core/memory.rs

ember_core/
memory.rs