Skip to main content

kevy_store/
small_set.rs

1//! `SmallSetData` — valkey-style inline-listpack encoding for tiny sets.
2//!
3//! v1.25 A.7 O5: mirror valkey's `OBJ_ENCODING_LISTPACK` for sets of size
4//! 1–N (`t_set.c::setTypeMaybeConvert`). valkey starts a fresh set as a
5//! 1-entry listpack inside one cache line; once cardinality grows past
6//! `set-max-listpack-entries` (128 default) OR a single member exceeds the
7//! per-entry size cap, it converts to `OBJ_ENCODING_HT`. kevy's analogue:
8//! [`SmallSetData`] for tiny sets, upgrade to [`crate::value::SetData`]
9//! (Swiss-table `KevySet<SmallBytes>`) on overflow.
10//!
11//! ## Layout
12//!
13//! Exactly 24 bytes, mirroring [`kevy_bytes::SmallBytes`] so the
14//! `Value::SmallSetInline(SmallSetData)` variant body matches the size
15//! of `Value::Str(SmallBytes)` and the
16//! `assert!(size_of::<Value>() <= 32)` in `value.rs:162` still holds:
17//!
18//! ```text
19//! offset: 0    1                                 23
20//!         +----+----+----+----+----+ ...     +-----+
21//!         | n  | u  |       buf[22]              |
22//!         +----+----+----+----+----+ ...     +-----+
23//! ```
24//!
25//! - `n` (u8): member count (`0..=22`, capped well below to leave room).
26//! - `u` (u8): bytes used in `buf` (sum of `1 + len_i` over all members).
27//! - `buf` ([u8; 22]): packed `[len_i: u8][member_i: u8; len_i]` entries.
28//!
29//! Per-entry length prefix is one byte → a single member is at most 21
30//! bytes (need 1 byte for the length itself). For 20-byte
31//! `element:__rand_int__` (the redis-benchmark default SADD member shape),
32//! one entry consumes 21 bytes — fits with 1 spare. For shorter members,
33//! 4-5 fit comfortably.
34//!
35//! ## Upgrade trigger
36//!
37//! Insert returns [`AddResult::NoRoom`] when either (a) the new member's
38//! `1 + len` would overflow the 22-byte budget, or (b) the per-member
39//! length exceeds the 21-byte per-entry cap. The caller upgrades to
40//! `Value::Set(Arc<SetData>)` and re-inserts the new member there.
41//!
42//! Linear-scan `contains` over ≤ N members is faster than the
43//! hash-then-SIMD-probe path on a 16-slot Swiss table when N is small
44//! AND the data is one cache line — the same structural reason valkey's
45//! listpack beats `OBJ_ENCODING_HT` for N≤128.
46//!
47//! ## Future extension
48//!
49//! Hash / List / ZSet families want the same encoding switch. The
50//! pattern factored here:
51//! 1. Inline 24-byte packed-entries variant on `Value`.
52//! 2. Per-op `try_*` helper returns `AddResult { Added, AlreadyPresent,
53//!    NoRoom }` so the caller knows when to upgrade.
54//! 3. `account_delta` accepts the per-member weight delta uniformly; the
55//!    inline variant returns its own `inline_weight()` (zero heap) so the
56//!    Store's `used_memory` accounting stays consistent across both
57//!    encodings.
58//!
59//! The `Hash` analogue will need `[len_field][field][len_val][val]`
60//! tuples; List can use the same `[len][bytes]` shape; ZSet needs
61//! `[score:8][len][member]`. All three fit the 24-byte budget for the
62//! 1–3 member cases that dominate `redis-benchmark` default shapes.
63
64use kevy_bytes::SmallBytes;
65
66/// Inline packed set storage. 24 bytes total — see module docs for layout.
67#[derive(Clone)]
68pub struct SmallSetData {
69    /// Number of inline members (0..=22 cap, real ceiling is byte-budget).
70    count: u8,
71    /// Bytes used in `buf` so far (sum of `1 + member_len` per entry).
72    used: u8,
73    /// Packed `[len_i: u8][member_i; len_i]` entries, contiguous from
74    /// offset 0 up to `used`.
75    buf: [u8; SMALL_SET_BUF_CAP],
76}
77
78/// Byte budget for the inline packed entries area. Chosen so that
79/// `count(1) + used(1) + buf(22) = 24` bytes total, matching the
80/// `SmallBytes` body size and preserving `size_of::<Value>() <= 32`.
81pub(crate) const SMALL_SET_BUF_CAP: usize = 22;
82
83/// Per-member length cap: one byte is spent on the length prefix, so the
84/// member payload can be at most `SMALL_SET_BUF_CAP - 1` bytes. Members
85/// larger than this trigger an [`AddResult::NoRoom`] regardless of how
86/// empty the inline buffer is — the caller must upgrade to
87/// `Value::Set` to store them.
88pub(crate) const SMALL_SET_MEMBER_MAX: usize = SMALL_SET_BUF_CAP - 1;
89
90/// Per-set member count cap. The byte budget tends to hit first (a single
91/// 20-byte member takes 21 of 22 bytes), but a hard `count` cap keeps the
92/// linear scan deterministic and bounds the `u8` count field.
93pub(crate) const SMALL_SET_COUNT_MAX: usize = 8;
94
95/// Outcome of [`SmallSetData::try_add`].
96pub(crate) enum AddResult {
97    /// Member was new; count + used updated.
98    Added,
99    /// Member already present; no change.
100    AlreadyPresent,
101    /// Member doesn't fit (either too long or buffer full / count cap).
102    /// Caller must upgrade to `Value::Set` and re-insert.
103    NoRoom,
104}
105
106impl SmallSetData {
107    /// Build an empty inline set.
108    pub(crate) fn new() -> Self {
109        Self {
110            count: 0,
111            used: 0,
112            buf: [0; SMALL_SET_BUF_CAP],
113        }
114    }
115
116    /// Build an inline set holding one member, if it fits. Returns `None`
117    /// when the member exceeds [`SMALL_SET_MEMBER_MAX`] — the caller
118    /// should create a `Value::Set(Arc::default())` and insert the
119    /// member there instead.
120    pub(crate) fn with_one(member: &[u8]) -> Option<Self> {
121        if member.len() > SMALL_SET_MEMBER_MAX {
122            return None;
123        }
124        let mut s = Self::new();
125        s.buf[0] = member.len() as u8;
126        s.buf[1..1 + member.len()].copy_from_slice(member);
127        s.count = 1;
128        s.used = 1 + member.len() as u8;
129        Some(s)
130    }
131
132    /// Number of inline members.
133    pub fn len(&self) -> usize {
134        self.count as usize
135    }
136
137    /// Whether the inline set has no members.
138    pub fn is_empty(&self) -> bool {
139        self.count == 0
140    }
141
142    /// Linear scan for `member`. ≤22 bytes of packed entries fit in one
143    /// cache line; loop is unrolled by the optimiser at small counts.
144    pub fn contains(&self, member: &[u8]) -> bool {
145        self.iter_slices().any(|m| m == member)
146    }
147
148    /// Iterator over the packed entries as `&[u8]` slices. Owns nothing.
149    pub fn iter_slices(&self) -> SmallSetIter<'_> {
150        SmallSetIter { buf: &self.buf[..self.used as usize], cursor: 0 }
151    }
152
153    /// Alias for [`Self::iter_slices`] — matches the `iter` shape used by
154    /// other collection types in this crate.
155    pub fn iter(&self) -> SmallSetIter<'_> {
156        self.iter_slices()
157    }
158
159    /// Try to append `member`. See [`AddResult`].
160    pub(crate) fn try_add(&mut self, member: &[u8]) -> AddResult {
161        if self.contains(member) {
162            return AddResult::AlreadyPresent;
163        }
164        if member.len() > SMALL_SET_MEMBER_MAX {
165            return AddResult::NoRoom;
166        }
167        if self.count as usize >= SMALL_SET_COUNT_MAX {
168            return AddResult::NoRoom;
169        }
170        let need = 1 + member.len();
171        let new_used = self.used as usize + need;
172        if new_used > SMALL_SET_BUF_CAP {
173            return AddResult::NoRoom;
174        }
175        let off = self.used as usize;
176        self.buf[off] = member.len() as u8;
177        self.buf[off + 1..off + need].copy_from_slice(member);
178        self.used = new_used as u8;
179        self.count += 1;
180        AddResult::Added
181    }
182
183    /// Try to remove `member`. Returns whether it was present. On hit,
184    /// the trailing packed entries are shifted left by `1 + len` to
185    /// close the gap (deterministic O(used) memmove, fits in cache line).
186    pub(crate) fn try_remove(&mut self, member: &[u8]) -> bool {
187        let mut cursor = 0usize;
188        let used = self.used as usize;
189        while cursor < used {
190            let len = self.buf[cursor] as usize;
191            let start = cursor + 1;
192            let end = start + len;
193            if &self.buf[start..end] == member {
194                // Shift [end..used) → [cursor..)
195                self.buf.copy_within(end..used, cursor);
196                let shifted = used - end;
197                let new_used = cursor + shifted;
198                // Zero the freed tail (avoid leaking old member bytes
199                // into snapshots / accidental Debug prints).
200                self.buf[new_used..used].fill(0);
201                self.used = new_used as u8;
202                self.count -= 1;
203                return true;
204            }
205            cursor = end;
206        }
207        false
208    }
209}
210
211/// Iterator over [`SmallSetData`] members as `&[u8]` slices.
212pub struct SmallSetIter<'a> {
213    buf: &'a [u8],
214    cursor: usize,
215}
216
217impl<'a> Iterator for SmallSetIter<'a> {
218    type Item = &'a [u8];
219
220    fn next(&mut self) -> Option<&'a [u8]> {
221        if self.cursor >= self.buf.len() {
222            return None;
223        }
224        let len = self.buf[self.cursor] as usize;
225        let start = self.cursor + 1;
226        let end = start + len;
227        self.cursor = end;
228        Some(&self.buf[start..end])
229    }
230}
231
232/// Materialise the inline set as a heap-backed [`crate::value::SetData`].
233/// Used when an upgrade is forced by an oversized member or full buffer.
234pub(crate) fn promote(inline: &SmallSetData) -> crate::value::SetData {
235    let mut s = crate::value::SetData::with_capacity(inline.len().max(1));
236    for m in inline.iter_slices() {
237        s.insert(SmallBytes::from_slice(m));
238    }
239    s
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn size_is_24_bytes() {
248        // Mirrors SmallBytes' 24 B body so size_of::<Value>() <= 32 holds.
249        assert_eq!(std::mem::size_of::<SmallSetData>(), 24);
250    }
251
252    #[test]
253    fn empty_and_with_one() {
254        let s = SmallSetData::new();
255        assert_eq!(s.len(), 0);
256        assert!(!s.contains(b"foo"));
257
258        let s = SmallSetData::with_one(b"hi").unwrap();
259        assert_eq!(s.len(), 1);
260        assert!(s.contains(b"hi"));
261        assert!(!s.contains(b"hj"));
262    }
263
264    #[test]
265    fn member_too_long_for_with_one() {
266        let big = vec![b'x'; SMALL_SET_MEMBER_MAX + 1];
267        assert!(SmallSetData::with_one(&big).is_none());
268    }
269
270    #[test]
271    fn add_dedup_and_iter() {
272        let mut s = SmallSetData::new();
273        assert!(matches!(s.try_add(b"a"), AddResult::Added));
274        assert!(matches!(s.try_add(b"b"), AddResult::Added));
275        assert!(matches!(s.try_add(b"a"), AddResult::AlreadyPresent));
276        assert_eq!(s.len(), 2);
277        let v: Vec<&[u8]> = s.iter_slices().collect();
278        assert_eq!(v, vec![b"a".as_slice(), b"b".as_slice()]);
279    }
280
281    #[test]
282    fn full_buffer_returns_no_room() {
283        let mut s = SmallSetData::new();
284        // single 20-byte member uses 21 of 22 bytes; second won't fit.
285        let m1 = b"element:__rand_int__";
286        assert_eq!(m1.len(), 20);
287        assert!(matches!(s.try_add(m1), AddResult::Added));
288        assert!(matches!(s.try_add(b"x"), AddResult::NoRoom));
289    }
290
291    #[test]
292    fn member_too_long_returns_no_room() {
293        let mut s = SmallSetData::new();
294        let big = vec![b'x'; SMALL_SET_MEMBER_MAX + 1];
295        assert!(matches!(s.try_add(&big), AddResult::NoRoom));
296        assert_eq!(s.len(), 0);
297    }
298
299    #[test]
300    fn remove_middle_shifts_tail() {
301        let mut s = SmallSetData::new();
302        assert!(matches!(s.try_add(b"aa"), AddResult::Added));
303        assert!(matches!(s.try_add(b"bbb"), AddResult::Added));
304        assert!(matches!(s.try_add(b"cc"), AddResult::Added));
305        assert!(s.try_remove(b"bbb"));
306        assert_eq!(s.len(), 2);
307        assert!(s.contains(b"aa"));
308        assert!(!s.contains(b"bbb"));
309        assert!(s.contains(b"cc"));
310        let v: Vec<&[u8]> = s.iter_slices().collect();
311        assert_eq!(v, vec![b"aa".as_slice(), b"cc".as_slice()]);
312    }
313
314    #[test]
315    fn remove_absent_returns_false() {
316        let mut s = SmallSetData::new();
317        s.try_add(b"a");
318        assert!(!s.try_remove(b"zz"));
319        assert_eq!(s.len(), 1);
320    }
321
322    #[test]
323    fn count_cap_returns_no_room() {
324        let mut s = SmallSetData::new();
325        // 8 × 1-byte members fit in 16 bytes; 9th should hit the count cap
326        // before the byte cap.
327        for c in b"abcdefgh" {
328            assert!(matches!(s.try_add(&[*c]), AddResult::Added));
329        }
330        assert_eq!(s.len(), SMALL_SET_COUNT_MAX);
331        assert!(matches!(s.try_add(b"i"), AddResult::NoRoom));
332    }
333
334    #[test]
335    fn promote_preserves_members() {
336        let mut s = SmallSetData::new();
337        s.try_add(b"a");
338        s.try_add(b"bb");
339        s.try_add(b"ccc");
340        let promoted = promote(&s);
341        assert_eq!(promoted.len(), 3);
342        assert!(promoted.contains(b"a".as_slice()));
343        assert!(promoted.contains(b"bb".as_slice()));
344        assert!(promoted.contains(b"ccc".as_slice()));
345    }
346}