kevy_store/small_set.rs
1//! `SmallSetData` — valkey-style inline-listpack encoding for tiny sets.
2//!
3//! v1.25 A.7 O5: mirror valkey's `OBJ_ENCODING_LISTPACK` for sets of size
4//! 1–N (`t_set.c::setTypeMaybeConvert`). valkey starts a fresh set as a
5//! 1-entry listpack inside one cache line; once cardinality grows past
6//! `set-max-listpack-entries` (128 default) OR a single member exceeds the
7//! per-entry size cap, it converts to `OBJ_ENCODING_HT`. kevy's analogue:
8//! [`SmallSetData`] for tiny sets, upgrade to [`crate::value::SetData`]
9//! (Swiss-table `KevySet<SmallBytes>`) on overflow.
10//!
11//! ## Layout
12//!
13//! Exactly 24 bytes, mirroring [`kevy_bytes::SmallBytes`] so the
14//! `Value::SmallSetInline(SmallSetData)` variant body matches the size
15//! of `Value::Str(SmallBytes)` and the
16//! `assert!(size_of::<Value>() <= 32)` in `value.rs:162` still holds:
17//!
18//! ```text
19//! offset: 0 1 23
20//! +----+----+----+----+----+ ... +-----+
21//! | n | u | buf[22] |
22//! +----+----+----+----+----+ ... +-----+
23//! ```
24//!
25//! - `n` (u8): member count (`0..=22`, capped well below to leave room).
26//! - `u` (u8): bytes used in `buf` (sum of `1 + len_i` over all members).
27//! - `buf` ([u8; 22]): packed `[len_i: u8][member_i: u8; len_i]` entries.
28//!
29//! Per-entry length prefix is one byte → a single member is at most 21
30//! bytes (need 1 byte for the length itself). For 20-byte
31//! `element:__rand_int__` (the redis-benchmark default SADD member shape),
32//! one entry consumes 21 bytes — fits with 1 spare. For shorter members,
33//! 4-5 fit comfortably.
34//!
35//! ## Upgrade trigger
36//!
37//! Insert returns [`AddResult::NoRoom`] when either (a) the new member's
38//! `1 + len` would overflow the 22-byte budget, or (b) the per-member
39//! length exceeds the 21-byte per-entry cap. The caller upgrades to
40//! `Value::Set(Arc<SetData>)` and re-inserts the new member there.
41//!
42//! Linear-scan `contains` over ≤ N members is faster than the
43//! hash-then-SIMD-probe path on a 16-slot Swiss table when N is small
44//! AND the data is one cache line — the same structural reason valkey's
45//! listpack beats `OBJ_ENCODING_HT` for N≤128.
46//!
47//! ## Future extension
48//!
49//! Hash / List / ZSet families want the same encoding switch. The
50//! pattern factored here:
51//! 1. Inline 24-byte packed-entries variant on `Value`.
52//! 2. Per-op `try_*` helper returns `AddResult { Added, AlreadyPresent,
53//! NoRoom }` so the caller knows when to upgrade.
54//! 3. `account_delta` accepts the per-member weight delta uniformly; the
55//! inline variant returns its own `inline_weight()` (zero heap) so the
56//! Store's `used_memory` accounting stays consistent across both
57//! encodings.
58//!
59//! The `Hash` analogue will need `[len_field][field][len_val][val]`
60//! tuples; List can use the same `[len][bytes]` shape; ZSet needs
61//! `[score:8][len][member]`. All three fit the 24-byte budget for the
62//! 1–3 member cases that dominate `redis-benchmark` default shapes.
63
64use kevy_bytes::SmallBytes;
65
66/// Inline packed set storage. 24 bytes total — see module docs for layout.
67#[derive(Clone)]
68pub struct SmallSetData {
69 /// Number of inline members (0..=22 cap, real ceiling is byte-budget).
70 count: u8,
71 /// Bytes used in `buf` so far (sum of `1 + member_len` per entry).
72 used: u8,
73 /// Packed `[len_i: u8][member_i; len_i]` entries, contiguous from
74 /// offset 0 up to `used`.
75 buf: [u8; SMALL_SET_BUF_CAP],
76}
77
78/// Byte budget for the inline packed entries area. Chosen so that
79/// `count(1) + used(1) + buf(22) = 24` bytes total, matching the
80/// `SmallBytes` body size and preserving `size_of::<Value>() <= 32`.
81pub(crate) const SMALL_SET_BUF_CAP: usize = 22;
82
83/// Per-member length cap: one byte is spent on the length prefix, so the
84/// member payload can be at most `SMALL_SET_BUF_CAP - 1` bytes. Members
85/// larger than this trigger an [`AddResult::NoRoom`] regardless of how
86/// empty the inline buffer is — the caller must upgrade to
87/// `Value::Set` to store them.
88pub(crate) const SMALL_SET_MEMBER_MAX: usize = SMALL_SET_BUF_CAP - 1;
89
90/// Per-set member count cap. The byte budget tends to hit first (a single
91/// 20-byte member takes 21 of 22 bytes), but a hard `count` cap keeps the
92/// linear scan deterministic and bounds the `u8` count field.
93pub(crate) const SMALL_SET_COUNT_MAX: usize = 8;
94
95/// Outcome of [`SmallSetData::try_add`].
96pub(crate) enum AddResult {
97 /// Member was new; count + used updated.
98 Added,
99 /// Member already present; no change.
100 AlreadyPresent,
101 /// Member doesn't fit (either too long or buffer full / count cap).
102 /// Caller must upgrade to `Value::Set` and re-insert.
103 NoRoom,
104}
105
106impl SmallSetData {
107 /// Build an empty inline set.
108 pub(crate) fn new() -> Self {
109 Self {
110 count: 0,
111 used: 0,
112 buf: [0; SMALL_SET_BUF_CAP],
113 }
114 }
115
116 /// Build an inline set holding one member, if it fits. Returns `None`
117 /// when the member exceeds [`SMALL_SET_MEMBER_MAX`] — the caller
118 /// should create a `Value::Set(Arc::default())` and insert the
119 /// member there instead.
120 pub(crate) fn with_one(member: &[u8]) -> Option<Self> {
121 if member.len() > SMALL_SET_MEMBER_MAX {
122 return None;
123 }
124 let mut s = Self::new();
125 s.buf[0] = member.len() as u8;
126 s.buf[1..1 + member.len()].copy_from_slice(member);
127 s.count = 1;
128 s.used = 1 + member.len() as u8;
129 Some(s)
130 }
131
132 /// Number of inline members.
133 pub fn len(&self) -> usize {
134 self.count as usize
135 }
136
137 /// Whether the inline set has no members.
138 pub fn is_empty(&self) -> bool {
139 self.count == 0
140 }
141
142 /// Linear scan for `member`. ≤22 bytes of packed entries fit in one
143 /// cache line; loop is unrolled by the optimiser at small counts.
144 pub fn contains(&self, member: &[u8]) -> bool {
145 self.iter_slices().any(|m| m == member)
146 }
147
148 /// Iterator over the packed entries as `&[u8]` slices. Owns nothing.
149 pub fn iter_slices(&self) -> SmallSetIter<'_> {
150 SmallSetIter { buf: &self.buf[..self.used as usize], cursor: 0 }
151 }
152
153 /// Alias for [`Self::iter_slices`] — matches the `iter` shape used by
154 /// other collection types in this crate.
155 pub fn iter(&self) -> SmallSetIter<'_> {
156 self.iter_slices()
157 }
158
159 /// Try to append `member`. See [`AddResult`].
160 pub(crate) fn try_add(&mut self, member: &[u8]) -> AddResult {
161 if self.contains(member) {
162 return AddResult::AlreadyPresent;
163 }
164 if member.len() > SMALL_SET_MEMBER_MAX {
165 return AddResult::NoRoom;
166 }
167 if self.count as usize >= SMALL_SET_COUNT_MAX {
168 return AddResult::NoRoom;
169 }
170 let need = 1 + member.len();
171 let new_used = self.used as usize + need;
172 if new_used > SMALL_SET_BUF_CAP {
173 return AddResult::NoRoom;
174 }
175 let off = self.used as usize;
176 self.buf[off] = member.len() as u8;
177 self.buf[off + 1..off + need].copy_from_slice(member);
178 self.used = new_used as u8;
179 self.count += 1;
180 AddResult::Added
181 }
182
183 /// Try to remove `member`. Returns whether it was present. On hit,
184 /// the trailing packed entries are shifted left by `1 + len` to
185 /// close the gap (deterministic O(used) memmove, fits in cache line).
186 pub(crate) fn try_remove(&mut self, member: &[u8]) -> bool {
187 let mut cursor = 0usize;
188 let used = self.used as usize;
189 while cursor < used {
190 let len = self.buf[cursor] as usize;
191 let start = cursor + 1;
192 let end = start + len;
193 if &self.buf[start..end] == member {
194 // Shift [end..used) → [cursor..)
195 self.buf.copy_within(end..used, cursor);
196 let shifted = used - end;
197 let new_used = cursor + shifted;
198 // Zero the freed tail (avoid leaking old member bytes
199 // into snapshots / accidental Debug prints).
200 self.buf[new_used..used].fill(0);
201 self.used = new_used as u8;
202 self.count -= 1;
203 return true;
204 }
205 cursor = end;
206 }
207 false
208 }
209}
210
211/// Iterator over [`SmallSetData`] members as `&[u8]` slices.
212pub struct SmallSetIter<'a> {
213 buf: &'a [u8],
214 cursor: usize,
215}
216
217impl<'a> Iterator for SmallSetIter<'a> {
218 type Item = &'a [u8];
219
220 fn next(&mut self) -> Option<&'a [u8]> {
221 if self.cursor >= self.buf.len() {
222 return None;
223 }
224 let len = self.buf[self.cursor] as usize;
225 let start = self.cursor + 1;
226 let end = start + len;
227 self.cursor = end;
228 Some(&self.buf[start..end])
229 }
230}
231
232/// Materialise the inline set as a heap-backed [`crate::value::SetData`].
233/// Used when an upgrade is forced by an oversized member or full buffer.
234pub(crate) fn promote(inline: &SmallSetData) -> crate::value::SetData {
235 let mut s = crate::value::SetData::with_capacity(inline.len().max(1));
236 for m in inline.iter_slices() {
237 s.insert(SmallBytes::from_slice(m));
238 }
239 s
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245
246 #[test]
247 fn size_is_24_bytes() {
248 // Mirrors SmallBytes' 24 B body so size_of::<Value>() <= 32 holds.
249 assert_eq!(std::mem::size_of::<SmallSetData>(), 24);
250 }
251
252 #[test]
253 fn empty_and_with_one() {
254 let s = SmallSetData::new();
255 assert_eq!(s.len(), 0);
256 assert!(!s.contains(b"foo"));
257
258 let s = SmallSetData::with_one(b"hi").unwrap();
259 assert_eq!(s.len(), 1);
260 assert!(s.contains(b"hi"));
261 assert!(!s.contains(b"hj"));
262 }
263
264 #[test]
265 fn member_too_long_for_with_one() {
266 let big = vec![b'x'; SMALL_SET_MEMBER_MAX + 1];
267 assert!(SmallSetData::with_one(&big).is_none());
268 }
269
270 #[test]
271 fn add_dedup_and_iter() {
272 let mut s = SmallSetData::new();
273 assert!(matches!(s.try_add(b"a"), AddResult::Added));
274 assert!(matches!(s.try_add(b"b"), AddResult::Added));
275 assert!(matches!(s.try_add(b"a"), AddResult::AlreadyPresent));
276 assert_eq!(s.len(), 2);
277 let v: Vec<&[u8]> = s.iter_slices().collect();
278 assert_eq!(v, vec![b"a".as_slice(), b"b".as_slice()]);
279 }
280
281 #[test]
282 fn full_buffer_returns_no_room() {
283 let mut s = SmallSetData::new();
284 // single 20-byte member uses 21 of 22 bytes; second won't fit.
285 let m1 = b"element:__rand_int__";
286 assert_eq!(m1.len(), 20);
287 assert!(matches!(s.try_add(m1), AddResult::Added));
288 assert!(matches!(s.try_add(b"x"), AddResult::NoRoom));
289 }
290
291 #[test]
292 fn member_too_long_returns_no_room() {
293 let mut s = SmallSetData::new();
294 let big = vec![b'x'; SMALL_SET_MEMBER_MAX + 1];
295 assert!(matches!(s.try_add(&big), AddResult::NoRoom));
296 assert_eq!(s.len(), 0);
297 }
298
299 #[test]
300 fn remove_middle_shifts_tail() {
301 let mut s = SmallSetData::new();
302 assert!(matches!(s.try_add(b"aa"), AddResult::Added));
303 assert!(matches!(s.try_add(b"bbb"), AddResult::Added));
304 assert!(matches!(s.try_add(b"cc"), AddResult::Added));
305 assert!(s.try_remove(b"bbb"));
306 assert_eq!(s.len(), 2);
307 assert!(s.contains(b"aa"));
308 assert!(!s.contains(b"bbb"));
309 assert!(s.contains(b"cc"));
310 let v: Vec<&[u8]> = s.iter_slices().collect();
311 assert_eq!(v, vec![b"aa".as_slice(), b"cc".as_slice()]);
312 }
313
314 #[test]
315 fn remove_absent_returns_false() {
316 let mut s = SmallSetData::new();
317 s.try_add(b"a");
318 assert!(!s.try_remove(b"zz"));
319 assert_eq!(s.len(), 1);
320 }
321
322 #[test]
323 fn count_cap_returns_no_room() {
324 let mut s = SmallSetData::new();
325 // 8 × 1-byte members fit in 16 bytes; 9th should hit the count cap
326 // before the byte cap.
327 for c in b"abcdefgh" {
328 assert!(matches!(s.try_add(&[*c]), AddResult::Added));
329 }
330 assert_eq!(s.len(), SMALL_SET_COUNT_MAX);
331 assert!(matches!(s.try_add(b"i"), AddResult::NoRoom));
332 }
333
334 #[test]
335 fn promote_preserves_members() {
336 let mut s = SmallSetData::new();
337 s.try_add(b"a");
338 s.try_add(b"bb");
339 s.try_add(b"ccc");
340 let promoted = promote(&s);
341 assert_eq!(promoted.len(), 3);
342 assert!(promoted.contains(b"a".as_slice()));
343 assert!(promoted.contains(b"bb".as_slice()));
344 assert!(promoted.contains(b"ccc".as_slice()));
345 }
346}