vecshard/
lib.rs

1/*!
2Split Vecs in O(1) time.
3
4You can split a [`Vec`] into two using [`Vec::split_off`](std::vec::Vec::split_off),
5but since most allocators can't just go and split up an allocation, this needs to allocate space
6for a second [`Vec`] and, even worse, copy the relevant elements over, which takes O(n) time.
7You could also split it into slices using `Vec::split_at` or
8`Vec::split_at_mut`, but this will not give you owned
9data you can move around or move out of at will.
10
11This crate provides a way to split a [`Vec`] into two owned [`VecShard`]s that
12behave similar to Vecs that takes constant time.
13The catch is that the [`VecShard`]s use reference counting to determine when the last of them is dropped.
14Only then is the memory from the original [`Vec`] deallocated.
15The individual items in the shards, however, are dropped as soon as the shard is dropped.
16
17This functionality is provided through an extension trait for [`Vec`], [`ShardExt`](crate::ShardExt).
18
19# Basic Example
20
21```
22use vecshard::ShardExt;
23
24let animals = vec!["penguin", "owl", "toucan", "turtle", "spider", "mosquitto"];
25
26// split the vec into 2 shards
27let (cool_animals, uncool_animals) = animals.split_inplace_at(4);
28
29// shards can be indexed as usual
30assert_eq!(cool_animals[3], "turtle");
31assert_eq!(uncool_animals[0], "spider");
32
33// ..including with a range as index
34assert_eq!(cool_animals[1..3], ["owl", "toucan"]);
35
36// they deref into slices, so you can use them as such:
37assert_eq!(cool_animals.len(), 4);
38assert!(uncool_animals.ends_with(&["mosquitto"]));
39
40// shards can also be split up again:
41let (cool_birds, cool_reptiles) = cool_animals.split_inplace_at(3);
42assert_eq!(*cool_birds, ["penguin", "owl", "toucan"]);
43assert_eq!(*cool_reptiles, ["turtle"]);
44```
45
46# Conversion
47
48Shards can be freely converted both [`From`](std::convert::From) and [`Into`](std::convert::Into) Vecs.
49Note that the latter may need to allocate if there are other shards also using the shards allocation.
50
51```
52# use vecshard::{VecShard, ShardExt};
53
54let vec = vec![1, 2, 3];
55let shard = VecShard::from(vec);
56let vec2 : Vec<_> = shard.into();
57```
58
59# Iteration
60
61To iterate over a [`VecShard`], you have several choices.
62[`VecShard<T>`](crate::VecShard) itself is a draining [`Iterator`] and returns owned `T` instances,
63removing them from its own storage.
64If you only need `&T` or `&mut T`, you can deref it to a slice and iterate over that.
65Finally, if you need an owning [`Iterator`] but do not want to drain the shard,
66you can [`clone`][std::clone::Clone::clone] the shard and iterate over that.
67
68```
69# use vecshard::{VecShard, ShardExt};
70let mut shard = VecShard::from(vec!['y', 'e', 'e', 't']);
71
72assert_eq!(Some('y'), shard.next());
73assert_eq!(Some('e'), shard.next());
74
75assert_eq!(*shard, ['e', 't']);
76```
77
78# Optional Features
79
80This crate has zero dependencies by default, but if you want to serialize and deserialize `VecShard`,
81you can enable the `serde` feature like this:
82
83```toml
84[dependencies.vecshard]
85optional = true
86version = "0.2.1"
87```
88
89[`VecShard`]: crate::VecShard
90*/
91
92use std::{
93    cmp::{Eq, PartialEq},
94    fmt,
95    hash::{Hash, Hasher},
96    iter::FusedIterator,
97    mem,
98    ops::{Deref, DerefMut, Index, IndexMut},
99    ptr,
100    slice::{self, SliceIndex},
101    sync::Arc,
102};
103
104pub mod error;
105use crate::error::{CantMerge, WouldAlloc, WouldMove};
106
107#[cfg(feature = "serde")]
108mod serde_impl;
109
110/// An extension trait for things that can be split into shards
111///
112/// For your convenience, this is implemented for both [`Vec`](std::vec::Vec) and
113/// [`VecShard`](crate::VecShard), so you can split recursively:
114///
115/// ```
116/// # use vecshard::ShardExt;
117/// let drinks = vec!["heineken", "jupiler", "turmbräu", "orange juice", "champagne"];
118///
119/// let (beers, other_drinks) = drinks.split_inplace_at(3);
120/// let (bad_beers, good_beers) = beers.split_inplace_at(2);
121///
122/// assert_eq!(*good_beers, ["turmbräu"]);
123/// ```
124pub trait ShardExt {
125    type Shard;
126
127    /// Split this array into two shards at the given index.
128    /// This is an O(1) operation, as it keeps the underlying storage.
129    /// In exchange, this means that the memory will not be reclaimed until
130    /// all existing shards using it are dropped.
131    fn split_inplace_at(self, at: usize) -> (Self::Shard, Self::Shard);
132}
133
134/// The raw guts of a Vec, used to free its allocation when all the shards are gone.
135struct VecDropper<T> {
136    ptr: *mut T,
137    capacity: usize,
138}
139
140impl<T> Drop for VecDropper<T> {
141    fn drop(&mut self) {
142        unsafe {
143            // Set len to 0 because we only want to free the memory.
144            // Dropping the elements themselves is taken care of by the shards.
145            mem::drop(Vec::from_raw_parts(self.ptr, 0, self.capacity));
146        }
147    }
148}
149
150/// A shard of a [`Vec<T>`](std::vec::Vec), can be used mostly like a Vec.
151///
152/// The major difference is that, when dropped, [`VecShard<T>`](crate::VecShard)
153/// will not immediately free its allocated memory.
154/// Instead, it will only drop all its items.
155/// The memory itself will be freed once all VecShards from the Vec are gone.
156pub struct VecShard<T> {
157    dropper: Arc<VecDropper<T>>,
158
159    data: *mut T,
160    len: usize,
161}
162
163// These are the same as for Vec<T>
164// Probably sound, since the only thing we share is the Arc
165unsafe impl<T: Send> Send for VecShard<T> {}
166unsafe impl<T: Sync> Sync for VecShard<T> {}
167
168impl<T> VecShard<T> {
169    fn into_raw_parts(self) -> (Arc<VecDropper<T>>, *mut T, usize) {
170        let dropper = unsafe { ptr::read(&self.dropper as *const Arc<VecDropper<T>>) };
171        let data = self.data;
172        let len = self.len;
173        mem::forget(self);
174        (dropper, data, len)
175    }
176
177    /// Try to merge the given shards without moving them around.
178    ///
179    /// This can only succeed if `left` and `right` were split off from the same Vec
180    /// and are directly adjacent to each other.
181    /// Furthermore, `right` needs to be at a higher address than left so the elements stay in the right order.
182    ///
183    /// Returns the merged shard on success and an `Err` otherwise.
184    ///
185    /// This function will always run in O(1) time.
186    pub fn merge_inplace(left: Self, right: Self) -> Result<Self, CantMerge<T, WouldMove>> {
187        use WouldMove::*;
188        // Are the shards even from the same Vec?
189        if !Arc::ptr_eq(&left.dropper, &right.dropper) {
190            Err(CantMerge {
191                reason: DifferentAllocations,
192                left,
193                right,
194            })
195        } else if unsafe { left.data.add(left.len) } == right.data {
196            let (ldropper, ldata, llen) = left.into_raw_parts();
197            let (rdropper, _, rlen) = right.into_raw_parts();
198            std::mem::drop(rdropper);
199            Ok(VecShard {
200                dropper: ldropper,
201                data: ldata,
202                len: llen + rlen,
203            })
204        } else if unsafe { right.data.add(right.len) } == left.data {
205            Err(CantMerge {
206                left,
207                right,
208                reason: WrongOrder,
209            })
210        } else {
211            Err(CantMerge {
212                reason: NotAdjacent,
213                left,
214                right,
215            })
216        }
217    }
218
219    /// Try to merge the given shards without allocating a new `Vec`.
220    ///
221    /// This function will always succeed if the passed shards can be merged in-place
222    /// or if they're the only two shards within a Vec.
223    ///
224    /// Returns the merged shard on success and an `Err` otherwise.
225    ///
226    /// This function may take time line in the length of the input shards, but it will never allocate.
227    pub fn merge_noalloc(left: Self, right: Self) -> Result<Self, CantMerge<T, WouldAlloc>> {
228        use WouldMove::*;
229
230        let cant_merge = match Self::merge_inplace(left, right) {
231            // happy path
232            Ok(shard) => return Ok(shard),
233            Err(err) => err,
234        };
235
236        if cant_merge.reason == DifferentAllocations {
237            return Err(CantMerge {
238                left: cant_merge.left,
239                right: cant_merge.right,
240                reason: WouldAlloc::DifferentAllocations,
241            });
242        }
243
244        let (ldropper, ldata, llen) = cant_merge.left.into_raw_parts();
245        let (rdropper, rdata, rlen) = cant_merge.right.into_raw_parts();
246
247        if cant_merge.reason == WrongOrder {
248            // semi-fast path: we only need to rotate
249            unsafe { slice::from_raw_parts_mut(rdata, llen + rlen).rotate_left(rlen) };
250            Ok(VecShard {
251                dropper: ldropper,
252                data: rdata,
253                len: llen + rlen,
254            })
255        } else if cant_merge.reason == NotAdjacent && Arc::strong_count(&ldropper) == 2 {
256            // There are only 2 references to the dropper left,
257            // and we're holding ldropper and rdropper, so we can freely re-use the allocation
258
259            let new_data = unsafe {
260                if rdata < ldata {
261                    // If right is actually on the left side, we have to shuffle things around
262                    if llen < rlen {
263                        //  ...  |---------- r ----------| ... |------ l ------|
264                        ptr::swap_nonoverlapping(rdata, ldata, llen);
265                        //  ...  |------ l ------|- ..r -| ... |----- r.. -----|
266                        ptr::copy(ldata, rdata.add(rlen), llen);
267                        //  ...  |------ l ------|- ..r -|----- r.. -----|  ...
268                        slice::from_raw_parts_mut(rdata.add(llen), rlen).rotate_left(rlen - llen);
269                    //      ...  |------ l ------|---------- r ----------|  ...
270                    } else {
271                        //  ...  |------ r ------| ... |---------- l ----------|
272                        ptr::swap_nonoverlapping(rdata, ldata, rlen);
273                        //  ...  |----- l.. -----| ... |------ r ------|- ..l -|
274                        slice::from_raw_parts_mut(ldata, llen).rotate_left(rlen);
275                        //  ...  |----- l.. -----| ... |- ..l -|------ r ------|
276                        ptr::copy(ldata, rdata.add(rlen), llen);
277                        //  ...  |---------- l ----------|------ r ------|  ...
278                    };
279                    rdata
280                } else {
281                    // Otherwise, just scootch it over
282                    //  ...  |---------- l ----------|    ...  |------ r ------|
283                    ptr::copy(rdata, ldata.add(llen), rlen);
284                    //  ...  |---------- l ----------|------ r ------|   ...
285                    ldata
286                }
287            };
288            Ok(VecShard {
289                data: new_data,
290                len: llen + rlen,
291                dropper: ldropper,
292            })
293        } else {
294            Err(CantMerge {
295                reason: WouldAlloc::OtherShardsLeft,
296                left: VecShard {
297                    dropper: ldropper,
298                    data: ldata,
299                    len: llen,
300                },
301                right: VecShard {
302                    dropper: rdropper,
303                    data: rdata,
304                    len: rlen,
305                },
306            })
307        }
308    }
309
310    /// Merge the given shards into a single shard.
311    ///
312    /// This will attempt an O(1) merge like `merge_inplace` but fall back to copying slices around
313    /// within their allocation and possibly allocating a new Vec if needed.
314    pub fn merge(left: Self, right: Self) -> Self {
315        Self::merge_noalloc(left, right).unwrap_or_else(|err| {
316            let (_ldropper, ldata, llen) = err.left.into_raw_parts();
317            let (_rdropper, rdata, rlen) = err.right.into_raw_parts();
318
319            // Give up and allocate
320            let mut vec = Vec::with_capacity(llen + rlen);
321            unsafe {
322                ptr::copy(ldata, vec.as_mut_ptr(), llen);
323                ptr::copy(rdata, vec.as_mut_ptr().add(llen), rlen);
324                vec.set_len(llen + rlen);
325            }
326            Self::from(vec)
327        })
328    }
329}
330
331impl<T> ShardExt for VecShard<T> {
332    type Shard = VecShard<T>;
333
334    fn split_inplace_at(mut self, at: usize) -> (Self::Shard, Self::Shard) {
335        assert!(at <= self.len);
336
337        let right = VecShard {
338            dropper: self.dropper.clone(),
339            data: unsafe { self.data.add(at) },
340            len: self.len - at,
341        };
342
343        // for the left shard, just cut ourselves down to size
344        self.len = at;
345
346        (self, right)
347    }
348}
349
350impl<T> Drop for VecShard<T> {
351    fn drop(&mut self) {
352        // Drop all the elements
353        // The VecDropper will take care of freeing the Vec itself, if needed
354        for o in 0..self.len {
355            unsafe { ptr::drop_in_place(self.data.add(o)) };
356        }
357    }
358}
359
360impl<T> Deref for VecShard<T> {
361    type Target = [T];
362
363    fn deref(&self) -> &[T] {
364        unsafe { slice::from_raw_parts(self.data, self.len) }
365    }
366}
367
368impl<T> DerefMut for VecShard<T> {
369    fn deref_mut(&mut self) -> &mut [T] {
370        unsafe { slice::from_raw_parts_mut(self.data, self.len) }
371    }
372}
373
374impl<T, I: SliceIndex<[T]>> Index<I> for VecShard<T> {
375    type Output = <I as slice::SliceIndex<[T]>>::Output;
376
377    fn index(&self, idx: I) -> &Self::Output {
378        &((**self)[idx])
379    }
380}
381
382impl<T, I: SliceIndex<[T]>> IndexMut<I> for VecShard<T> {
383    fn index_mut(&mut self, idx: I) -> &mut Self::Output {
384        &mut ((**self)[idx])
385    }
386}
387
388impl<T: PartialEq> PartialEq for VecShard<T> {
389    fn eq(&self, rhs: &Self) -> bool {
390        **self == **rhs
391    }
392}
393
394impl<T: Eq> Eq for VecShard<T> {}
395
396impl<T> Iterator for VecShard<T> {
397    type Item = T;
398
399    fn next(&mut self) -> Option<T> {
400        if self.len > 0 {
401            let res = unsafe { self.data.read() };
402            self.len -= 1;
403            self.data = unsafe { self.data.add(1) };
404            Some(res)
405        } else {
406            None
407        }
408    }
409
410    fn size_hint(&self) -> (usize, Option<usize>) {
411        (self.len, Some(self.len))
412    }
413}
414
415impl<T> ExactSizeIterator for VecShard<T> {
416    fn len(&self) -> usize {
417        self.len
418    }
419}
420
421impl<T> DoubleEndedIterator for VecShard<T> {
422    fn next_back(&mut self) -> Option<T> {
423        if self.len > 0 {
424            self.len -= 1;
425            Some(unsafe { self.data.add(self.len).read() })
426        } else {
427            None
428        }
429    }
430}
431
432impl<T> FusedIterator for VecShard<T> {}
433
434impl<T: Hash> Hash for VecShard<T> {
435    fn hash<H: Hasher>(&self, state: &mut H) {
436        Hash::hash(&**self, state)
437    }
438}
439
440impl<T> From<Vec<T>> for VecShard<T> {
441    fn from(mut v: Vec<T>) -> Self {
442        let res = VecShard {
443            dropper: Arc::new(VecDropper {
444                ptr: v.as_mut_ptr(),
445                capacity: v.capacity(),
446            }),
447            data: v.as_mut_ptr(),
448            len: v.len(),
449        };
450        mem::forget(v);
451        res
452    }
453}
454
455impl<T> Into<Vec<T>> for VecShard<T> {
456    fn into(self) -> Vec<T> {
457        // First, move everything out of self so we don't drop anything
458        let (dropper, data, len) = self.into_raw_parts();
459
460        // Optimization: if this shard is the only one left from the backing Vec, we re-use its allocation
461        if let Ok(dropper) = Arc::try_unwrap(dropper) {
462            // If our data is already at the start of the backing Vec, we don't need to move it
463            if data != dropper.ptr {
464                unsafe { ptr::copy(data, dropper.ptr, len) };
465            }
466            let v = unsafe { Vec::from_raw_parts(dropper.ptr, len, dropper.capacity) };
467            // Make sure we don't drop anything that the new Vec will need
468            mem::forget(dropper);
469            v
470        } else {
471            // Otherwise, just allocate a new Vec
472            let mut v = Vec::with_capacity(len);
473            unsafe {
474                ptr::copy_nonoverlapping(data, v.as_mut_ptr(), len);
475                v.set_len(len);
476            };
477            v
478        }
479    }
480}
481
482impl<T: Clone> Clone for VecShard<T> {
483    fn clone(&self) -> VecShard<T> {
484        // Not much we can do here, just make a new Vec
485        let mut vec = Vec::with_capacity(self.len);
486        vec.extend_from_slice(unsafe { slice::from_raw_parts(self.data, self.len) });
487        VecShard::from(vec)
488    }
489}
490
491impl<T: fmt::Debug> fmt::Debug for VecShard<T> {
492    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
493        write!(f, "{:?}", &**self)
494    }
495}
496
497impl<T> ShardExt for Vec<T> {
498    type Shard = VecShard<T>;
499
500    fn split_inplace_at(self, at: usize) -> (Self::Shard, Self::Shard) {
501        VecShard::from(self).split_inplace_at(at)
502    }
503}