Skip to main content

selene_graph/
chunked_vec.rs

1//! Chunked column vector primitive per spec 03 section 3.2.
2//!
3//! Values are grouped into 2048-element chunks. Frozen chunks live as
4//! `Arc<[T]>` and share cheaply across snapshots; the currently-filling chunk
5//! (the tail) is held as an `Arc<Vec<T>>` so that cloning the column shares
6//! the tail by refcount bump instead of deep-copying its elements. Mutations
7//! use `Arc::make_mut` throughout: a write clones only the affected frozen
8//! chunk or the tail, and only when a snapshot still holds it, so a clone
9//! taken before a mutation never observes the mutation (B1 tail COW).
10
11use std::marker::PhantomData;
12use std::sync::Arc;
13
14/// Number of column entries per chunk.
15pub const CHUNK_SIZE: usize = 2048;
16
17/// Column storage split into independently clone-on-write chunks.
18#[derive(Clone, Debug)]
19pub struct ChunkedVec<T> {
20    chunks: Vec<Arc<[T]>>,
21    tail: Arc<Vec<T>>,
22    len: usize,
23    _marker: PhantomData<T>,
24}
25
26impl<T: Clone> ChunkedVec<T> {
27    /// Construct an empty column.
28    #[must_use]
29    pub fn new() -> Self {
30        Self {
31            chunks: Vec::new(),
32            tail: Arc::new(Vec::new()),
33            len: 0,
34            _marker: PhantomData,
35        }
36    }
37
38    /// Construct an empty column with enough chunk slots for `capacity`.
39    #[must_use]
40    pub fn with_capacity(capacity: usize) -> Self {
41        Self {
42            chunks: Vec::with_capacity(capacity.div_ceil(CHUNK_SIZE)),
43            tail: Arc::new(Vec::new()),
44            len: 0,
45            _marker: PhantomData,
46        }
47    }
48
49    /// Number of entries in the column.
50    #[must_use]
51    pub fn len(&self) -> usize {
52        self.len
53    }
54
55    /// Return true when the column contains no entries.
56    #[must_use]
57    pub fn is_empty(&self) -> bool {
58        self.len == 0
59    }
60
61    /// Return the value at `index`, if it exists.
62    #[must_use]
63    pub fn get(&self, index: usize) -> Option<&T> {
64        if index >= self.len {
65            return None;
66        }
67        let (chunk_index, offset) = locate(index);
68        if chunk_index < self.chunks.len() {
69            self.chunks[chunk_index].get(offset)
70        } else {
71            self.tail.get(offset)
72        }
73    }
74
75    /// Append `value` to the column in amortized O(1) time.
76    ///
77    /// Pushes go into the tail buffer through `Arc::make_mut`: while the tail
78    /// Arc is unique this is a plain `Vec::push`; the first push after a clone
79    /// pays one tail clone (≤ `CHUNK_SIZE - 1` elements) and subsequent pushes
80    /// reuse the now-unique buffer. When the tail reaches `CHUNK_SIZE` it
81    /// freezes into an `Arc<[T]>` immediately and a fresh tail starts on the
82    /// next push. Cloning the column shares frozen chunks *and* the tail via
83    /// Arc refcount bumps — no element is copied at clone time.
84    pub fn push(&mut self, value: T) {
85        let tail = Arc::make_mut(&mut self.tail);
86        if tail.capacity() == 0 {
87            tail.reserve(CHUNK_SIZE);
88        }
89        tail.push(value);
90        self.len += 1;
91        if tail.len() == CHUNK_SIZE {
92            let frozen = std::mem::take(tail);
93            self.chunks.push(Arc::from(frozen));
94        }
95    }
96
97    /// Replace the value at `index`.
98    ///
99    /// # Panics
100    ///
101    /// Panics if `index` is out of bounds.
102    pub fn set(&mut self, index: usize, value: T) {
103        assert!(
104            index < self.len,
105            "ChunkedVec::set index {index} out of bounds for len {}",
106            self.len
107        );
108        let (chunk_index, offset) = locate(index);
109        if chunk_index < self.chunks.len() {
110            let chunk = Arc::make_mut(&mut self.chunks[chunk_index]);
111            chunk[offset] = value;
112        } else {
113            Arc::make_mut(&mut self.tail)[offset] = value;
114        }
115    }
116
117    /// Iterate all values in insertion order.
118    pub fn iter(&self) -> impl Iterator<Item = &T> {
119        self.chunks
120            .iter()
121            .flat_map(|chunk| chunk.iter())
122            .chain(self.tail.iter())
123    }
124
125    #[cfg(test)]
126    pub(crate) fn chunk_count(&self) -> usize {
127        self.chunks.len() + if self.tail.is_empty() { 0 } else { 1 }
128    }
129
130    #[cfg(test)]
131    pub(crate) fn chunk_capacity(&self) -> usize {
132        self.chunks.capacity() + 1
133    }
134
135    #[cfg(test)]
136    pub(crate) fn chunk_arc(&self, index: usize) -> Arc<[T]> {
137        if index < self.chunks.len() {
138            Arc::clone(&self.chunks[index])
139        } else {
140            Arc::from(self.tail.as_slice())
141        }
142    }
143
144    #[cfg(test)]
145    pub(crate) fn slow_equals(&self, other: &Self) -> bool
146    where
147        T: PartialEq,
148    {
149        self.iter().eq(other.iter())
150    }
151}
152
153impl<T: Clone> Default for ChunkedVec<T> {
154    fn default() -> Self {
155        Self::new()
156    }
157}
158
159fn locate(index: usize) -> (usize, usize) {
160    (index / CHUNK_SIZE, index % CHUNK_SIZE)
161}
162
163#[cfg(test)]
164mod tests {
165    use proptest::prelude::*;
166
167    use super::*;
168
169    #[test]
170    fn new_is_empty() {
171        let vec = ChunkedVec::<u64>::new();
172        assert_eq!(vec.len(), 0);
173        assert!(vec.is_empty());
174        assert_eq!(vec.chunk_count(), 0);
175    }
176
177    #[test]
178    fn push_grows_and_get_returns_pushed_values() {
179        let mut vec = ChunkedVec::new();
180        for value in 0..5000 {
181            vec.push(value);
182        }
183        assert_eq!(vec.len(), 5000);
184        for value in 0..5000 {
185            assert_eq!(vec.get(value), Some(&value));
186        }
187        // 5000 = 2 frozen full chunks (4096) + 904 in tail.
188        assert_eq!(vec.chunk_count(), 3);
189    }
190
191    #[test]
192    fn push_does_not_clone_tail_per_call() {
193        // Pushing into a non-full tail must NOT touch frozen chunks' Arcs.
194        let mut vec = ChunkedVec::new();
195        for value in 0..CHUNK_SIZE {
196            vec.push(value);
197        }
198        // First chunk just froze; tail is empty.
199        assert_eq!(vec.chunks.len(), 1);
200        let first_chunk_handle = Arc::clone(&vec.chunks[0]);
201        // Push 100 more into the (now-fresh) tail; the frozen chunk's Arc
202        // strong count must stay 2 (vec.chunks[0] + first_chunk_handle).
203        for value in 0..100 {
204            vec.push(CHUNK_SIZE + value);
205        }
206        assert_eq!(Arc::strong_count(&first_chunk_handle), 2);
207        assert_eq!(vec.len(), CHUNK_SIZE + 100);
208    }
209
210    #[test]
211    fn set_replaces_value_at_index() {
212        let mut vec = ChunkedVec::new();
213        vec.push(1);
214        vec.push(2);
215        vec.set(1, 9);
216        assert_eq!(vec.get(1), Some(&9));
217    }
218
219    #[test]
220    fn set_only_clones_affected_chunk() {
221        let mut original = ChunkedVec::new();
222        for value in 0..4096 {
223            original.push(value);
224        }
225        // Two full frozen chunks, empty tail.
226        let mut cloned = original.clone();
227        assert!(original.slow_equals(&cloned));
228        let original_chunk_0 = original.chunk_arc(0);
229        let original_chunk_1 = original.chunk_arc(1);
230        cloned.set(CHUNK_SIZE + 1, 99);
231        assert!(!original.slow_equals(&cloned));
232        assert_eq!(Arc::strong_count(&original_chunk_0), 3);
233        assert_eq!(Arc::strong_count(&original_chunk_1), 2);
234        assert_eq!(original.get(CHUNK_SIZE + 1), Some(&(CHUNK_SIZE + 1)));
235        assert_eq!(cloned.get(CHUNK_SIZE + 1), Some(&99));
236    }
237
238    #[test]
239    fn set_in_tail_does_not_touch_frozen_chunks() {
240        let mut vec = ChunkedVec::new();
241        for value in 0..CHUNK_SIZE + 50 {
242            vec.push(value);
243        }
244        let frozen_handle = Arc::clone(&vec.chunks[0]);
245        vec.set(CHUNK_SIZE + 10, 999);
246        // The set targeted the tail; frozen chunk's strong count stays 2.
247        assert_eq!(Arc::strong_count(&frozen_handle), 2);
248        assert_eq!(vec.get(CHUNK_SIZE + 10), Some(&999));
249    }
250
251    #[test]
252    fn iter_yields_all_values_in_order() {
253        let mut vec = ChunkedVec::new();
254        for value in 0..4096 {
255            vec.push(value);
256        }
257        assert_eq!(
258            vec.iter().copied().collect::<Vec<_>>(),
259            (0..4096).collect::<Vec<_>>()
260        );
261    }
262
263    #[test]
264    fn iter_includes_tail() {
265        let mut vec = ChunkedVec::new();
266        for value in 0..CHUNK_SIZE + 5 {
267            vec.push(value);
268        }
269        assert_eq!(
270            vec.iter().copied().collect::<Vec<_>>(),
271            (0..CHUNK_SIZE + 5).collect::<Vec<_>>()
272        );
273    }
274
275    #[test]
276    fn with_capacity_does_not_overallocate_entries() {
277        let vec = ChunkedVec::<u64>::with_capacity(CHUNK_SIZE * 3 + 1);
278        assert_eq!(vec.len(), 0);
279        assert_eq!(vec.chunk_count(), 0);
280        assert!(vec.chunk_capacity() >= 4);
281    }
282
283    #[test]
284    fn get_out_of_bounds_returns_none() {
285        let mut vec = ChunkedVec::new();
286        vec.push(1);
287        assert_eq!(vec.get(1), None);
288    }
289
290    #[test]
291    #[should_panic(expected = "ChunkedVec::set index 1 out of bounds for len 1")]
292    fn set_out_of_bounds_panics_clearly() {
293        let mut vec = ChunkedVec::new();
294        vec.push(1);
295        vec.set(1, 2);
296    }
297
298    #[test]
299    fn clone_shares_tail_without_copying() {
300        // B1: cloning the column must share the tail allocation by refcount
301        // bump — read-only clones never duplicate tail elements.
302        let mut original = ChunkedVec::new();
303        for value in 0..100 {
304            original.push(value);
305        }
306        let cloned = original.clone();
307        assert!(Arc::ptr_eq(&original.tail, &cloned.tail));
308        assert_eq!(Arc::strong_count(&original.tail), 2);
309        assert!(original.slow_equals(&cloned));
310    }
311
312    #[test]
313    fn clone_then_push_isolates_original_snapshot() {
314        // B1: a push on the clone must COW the tail; the original (the
315        // "snapshot") keeps its pre-mutation contents and its own tail Arc.
316        let mut original = ChunkedVec::new();
317        for value in 0..10 {
318            original.push(value);
319        }
320        let snapshot_tail = Arc::clone(&original.tail);
321        let mut cloned = original.clone();
322        cloned.push(999);
323        // Original snapshot unchanged.
324        assert_eq!(original.len(), 10);
325        assert_eq!(original.get(10), None);
326        for value in 0..10 {
327            assert_eq!(original.get(value), Some(&value));
328        }
329        // Clone diverged onto its own tail allocation.
330        assert_eq!(cloned.get(10), Some(&999));
331        assert!(!Arc::ptr_eq(&original.tail, &cloned.tail));
332        // snapshot_tail + original.tail — the clone no longer shares it.
333        assert_eq!(Arc::strong_count(&snapshot_tail), 2);
334    }
335
336    #[test]
337    fn clone_then_set_in_tail_isolates_original_snapshot() {
338        let mut original = ChunkedVec::new();
339        for value in 0..10 {
340            original.push(value);
341        }
342        let mut cloned = original.clone();
343        cloned.set(3, 777);
344        assert_eq!(original.get(3), Some(&3));
345        assert_eq!(cloned.get(3), Some(&777));
346        assert!(!Arc::ptr_eq(&original.tail, &cloned.tail));
347    }
348
349    #[test]
350    fn first_write_makes_tail_unique_then_reuses_buffer() {
351        // The first push after a clone pays the one tail COW; subsequent
352        // pushes mutate the now-unique buffer in place (no further clones).
353        let mut original = ChunkedVec::new();
354        for value in 0..10 {
355            original.push(value);
356        }
357        let mut cloned = original.clone();
358        cloned.push(100);
359        // Compare raw Arc allocation pointers — holding a probe Arc clone
360        // would itself force `make_mut` to clone on the next write.
361        let unique_tail_ptr = Arc::as_ptr(&cloned.tail);
362        cloned.push(101);
363        cloned.set(0, 42);
364        // Still the same (unique) allocation: no further COW clones happened.
365        assert_eq!(Arc::as_ptr(&cloned.tail), unique_tail_ptr);
366        assert_eq!(Arc::strong_count(&cloned.tail), 1);
367        assert_eq!(cloned.get(0), Some(&42));
368        assert_eq!(original.get(0), Some(&0));
369    }
370
371    #[test]
372    fn freeze_under_share_preserves_both_columns() {
373        // Fill a clone up to the CHUNK_SIZE freeze boundary while the original
374        // still holds the pre-freeze tail Arc. The freeze must COW first, so
375        // the original keeps its short tail and the clone freezes correctly.
376        let mut original = ChunkedVec::new();
377        for value in 0..CHUNK_SIZE - 1 {
378            original.push(value);
379        }
380        let mut cloned = original.clone();
381        cloned.push(CHUNK_SIZE - 1); // triggers freeze in the clone
382        for value in CHUNK_SIZE..CHUNK_SIZE + 5 {
383            cloned.push(value);
384        }
385        // Original: one (unfrozen) tail of CHUNK_SIZE - 1 entries.
386        assert_eq!(original.len(), CHUNK_SIZE - 1);
387        assert_eq!(original.chunks.len(), 0);
388        assert_eq!(original.tail.len(), CHUNK_SIZE - 1);
389        for value in 0..CHUNK_SIZE - 1 {
390            assert_eq!(original.get(value), Some(&value));
391        }
392        // Clone: one frozen chunk plus a 5-entry fresh tail.
393        assert_eq!(cloned.len(), CHUNK_SIZE + 5);
394        assert_eq!(cloned.chunks.len(), 1);
395        assert_eq!(cloned.tail.len(), 5);
396        for value in 0..CHUNK_SIZE + 5 {
397            assert_eq!(cloned.get(value), Some(&value));
398        }
399    }
400
401    proptest! {
402        #[test]
403        fn random_push_set_sequence_preserves_latest_values(ops in proptest::collection::vec((any::<bool>(), 0_usize..128, any::<u16>()), 1..256)) {
404            let mut vec = ChunkedVec::new();
405            let mut expected = Vec::new();
406            for (set_existing, index, value) in ops {
407                if set_existing && !expected.is_empty() {
408                    let idx = index % expected.len();
409                    vec.set(idx, value);
410                    expected[idx] = value;
411                } else {
412                    vec.push(value);
413                    expected.push(value);
414                }
415                prop_assert_eq!(vec.len(), expected.len());
416                for (idx, expected_value) in expected.iter().enumerate() {
417                    prop_assert_eq!(vec.get(idx), Some(expected_value));
418                }
419            }
420        }
421
422        #[test]
423        fn clone_then_mutate_never_disturbs_snapshot(
424            seed_ops in proptest::collection::vec((any::<bool>(), 0_usize..4096, any::<u16>()), 1..512),
425            post_ops in proptest::collection::vec((any::<bool>(), 0_usize..4096, any::<u16>()), 1..512),
426        ) {
427            // B1 snapshot isolation: a clone taken at an arbitrary fill point
428            // (including straddling chunk freezes) must never observe pushes
429            // or sets applied to the live column afterwards.
430            let mut live = ChunkedVec::new();
431            let mut model = Vec::new();
432            for (set_existing, index, value) in seed_ops {
433                if set_existing && !model.is_empty() {
434                    let idx = index % model.len();
435                    live.set(idx, value);
436                    model[idx] = value;
437                } else {
438                    live.push(value);
439                    model.push(value);
440                }
441            }
442            let snapshot = live.clone();
443            let snapshot_model = model.clone();
444            for (set_existing, index, value) in post_ops {
445                if set_existing && !model.is_empty() {
446                    let idx = index % model.len();
447                    live.set(idx, value);
448                    model[idx] = value;
449                } else {
450                    live.push(value);
451                    model.push(value);
452                }
453            }
454            // Snapshot still matches the pre-clone model exactly.
455            prop_assert_eq!(snapshot.len(), snapshot_model.len());
456            for (idx, expected_value) in snapshot_model.iter().enumerate() {
457                prop_assert_eq!(snapshot.get(idx), Some(expected_value));
458            }
459            prop_assert_eq!(snapshot.get(snapshot_model.len()), None);
460            // Live column matches the post-mutation model.
461            prop_assert_eq!(live.len(), model.len());
462            for (idx, expected_value) in model.iter().enumerate() {
463                prop_assert_eq!(live.get(idx), Some(expected_value));
464            }
465        }
466    }
467}