vecshard/lib.rs
1/*!
2Split Vecs in O(1) time.
3
4You can split a [`Vec`] into two using [`Vec::split_off`](std::vec::Vec::split_off),
5but since most allocators can't just go and split up an allocation, this needs to allocate space
6for a second [`Vec`] and, even worse, copy the relevant elements over, which takes O(n) time.
7You could also split it into slices using `Vec::split_at` or
8`Vec::split_at_mut`, but this will not give you owned
9data you can move around or move out of at will.
10
11This crate provides a way to split a [`Vec`] into two owned [`VecShard`]s that
12behave similar to Vecs that takes constant time.
13The catch is that the [`VecShard`]s use reference counting to determine when the last of them is dropped.
14Only then is the memory from the original [`Vec`] deallocated.
15The individual items in the shards, however, are dropped as soon as the shard is dropped.
16
17This functionality is provided through an extension trait for [`Vec`], [`ShardExt`](crate::ShardExt).
18
19# Basic Example
20
21```
22use vecshard::ShardExt;
23
24let animals = vec!["penguin", "owl", "toucan", "turtle", "spider", "mosquitto"];
25
26// split the vec into 2 shards
27let (cool_animals, uncool_animals) = animals.split_inplace_at(4);
28
29// shards can be indexed as usual
30assert_eq!(cool_animals[3], "turtle");
31assert_eq!(uncool_animals[0], "spider");
32
33// ..including with a range as index
34assert_eq!(cool_animals[1..3], ["owl", "toucan"]);
35
36// they deref into slices, so you can use them as such:
37assert_eq!(cool_animals.len(), 4);
38assert!(uncool_animals.ends_with(&["mosquitto"]));
39
40// shards can also be split up again:
41let (cool_birds, cool_reptiles) = cool_animals.split_inplace_at(3);
42assert_eq!(*cool_birds, ["penguin", "owl", "toucan"]);
43assert_eq!(*cool_reptiles, ["turtle"]);
44```
45
46# Conversion
47
48Shards can be freely converted both [`From`](std::convert::From) and [`Into`](std::convert::Into) Vecs.
49Note that the latter may need to allocate if there are other shards also using the shards allocation.
50
51```
52# use vecshard::{VecShard, ShardExt};
53
54let vec = vec![1, 2, 3];
55let shard = VecShard::from(vec);
56let vec2 : Vec<_> = shard.into();
57```
58
59# Iteration
60
61To iterate over a [`VecShard`], you have several choices.
62[`VecShard<T>`](crate::VecShard) itself is a draining [`Iterator`] and returns owned `T` instances,
63removing them from its own storage.
64If you only need `&T` or `&mut T`, you can deref it to a slice and iterate over that.
65Finally, if you need an owning [`Iterator`] but do not want to drain the shard,
66you can [`clone`][std::clone::Clone::clone] the shard and iterate over that.
67
68```
69# use vecshard::{VecShard, ShardExt};
70let mut shard = VecShard::from(vec!['y', 'e', 'e', 't']);
71
72assert_eq!(Some('y'), shard.next());
73assert_eq!(Some('e'), shard.next());
74
75assert_eq!(*shard, ['e', 't']);
76```
77
78# Optional Features
79
80This crate has zero dependencies by default, but if you want to serialize and deserialize `VecShard`,
81you can enable the `serde` feature like this:
82
83```toml
84[dependencies.vecshard]
85optional = true
86version = "0.2.1"
87```
88
89[`VecShard`]: crate::VecShard
90*/
91
92use std::{
93 cmp::{Eq, PartialEq},
94 fmt,
95 hash::{Hash, Hasher},
96 iter::FusedIterator,
97 mem,
98 ops::{Deref, DerefMut, Index, IndexMut},
99 ptr,
100 slice::{self, SliceIndex},
101 sync::Arc,
102};
103
104pub mod error;
105use crate::error::{CantMerge, WouldAlloc, WouldMove};
106
107#[cfg(feature = "serde")]
108mod serde_impl;
109
110/// An extension trait for things that can be split into shards
111///
112/// For your convenience, this is implemented for both [`Vec`](std::vec::Vec) and
113/// [`VecShard`](crate::VecShard), so you can split recursively:
114///
115/// ```
116/// # use vecshard::ShardExt;
117/// let drinks = vec!["heineken", "jupiler", "turmbräu", "orange juice", "champagne"];
118///
119/// let (beers, other_drinks) = drinks.split_inplace_at(3);
120/// let (bad_beers, good_beers) = beers.split_inplace_at(2);
121///
122/// assert_eq!(*good_beers, ["turmbräu"]);
123/// ```
124pub trait ShardExt {
125 type Shard;
126
127 /// Split this array into two shards at the given index.
128 /// This is an O(1) operation, as it keeps the underlying storage.
129 /// In exchange, this means that the memory will not be reclaimed until
130 /// all existing shards using it are dropped.
131 fn split_inplace_at(self, at: usize) -> (Self::Shard, Self::Shard);
132}
133
134/// The raw guts of a Vec, used to free its allocation when all the shards are gone.
135struct VecDropper<T> {
136 ptr: *mut T,
137 capacity: usize,
138}
139
140impl<T> Drop for VecDropper<T> {
141 fn drop(&mut self) {
142 unsafe {
143 // Set len to 0 because we only want to free the memory.
144 // Dropping the elements themselves is taken care of by the shards.
145 mem::drop(Vec::from_raw_parts(self.ptr, 0, self.capacity));
146 }
147 }
148}
149
150/// A shard of a [`Vec<T>`](std::vec::Vec), can be used mostly like a Vec.
151///
152/// The major difference is that, when dropped, [`VecShard<T>`](crate::VecShard)
153/// will not immediately free its allocated memory.
154/// Instead, it will only drop all its items.
155/// The memory itself will be freed once all VecShards from the Vec are gone.
156pub struct VecShard<T> {
157 dropper: Arc<VecDropper<T>>,
158
159 data: *mut T,
160 len: usize,
161}
162
163// These are the same as for Vec<T>
164// Probably sound, since the only thing we share is the Arc
165unsafe impl<T: Send> Send for VecShard<T> {}
166unsafe impl<T: Sync> Sync for VecShard<T> {}
167
168impl<T> VecShard<T> {
169 fn into_raw_parts(self) -> (Arc<VecDropper<T>>, *mut T, usize) {
170 let dropper = unsafe { ptr::read(&self.dropper as *const Arc<VecDropper<T>>) };
171 let data = self.data;
172 let len = self.len;
173 mem::forget(self);
174 (dropper, data, len)
175 }
176
177 /// Try to merge the given shards without moving them around.
178 ///
179 /// This can only succeed if `left` and `right` were split off from the same Vec
180 /// and are directly adjacent to each other.
181 /// Furthermore, `right` needs to be at a higher address than left so the elements stay in the right order.
182 ///
183 /// Returns the merged shard on success and an `Err` otherwise.
184 ///
185 /// This function will always run in O(1) time.
186 pub fn merge_inplace(left: Self, right: Self) -> Result<Self, CantMerge<T, WouldMove>> {
187 use WouldMove::*;
188 // Are the shards even from the same Vec?
189 if !Arc::ptr_eq(&left.dropper, &right.dropper) {
190 Err(CantMerge {
191 reason: DifferentAllocations,
192 left,
193 right,
194 })
195 } else if unsafe { left.data.add(left.len) } == right.data {
196 let (ldropper, ldata, llen) = left.into_raw_parts();
197 let (rdropper, _, rlen) = right.into_raw_parts();
198 std::mem::drop(rdropper);
199 Ok(VecShard {
200 dropper: ldropper,
201 data: ldata,
202 len: llen + rlen,
203 })
204 } else if unsafe { right.data.add(right.len) } == left.data {
205 Err(CantMerge {
206 left,
207 right,
208 reason: WrongOrder,
209 })
210 } else {
211 Err(CantMerge {
212 reason: NotAdjacent,
213 left,
214 right,
215 })
216 }
217 }
218
219 /// Try to merge the given shards without allocating a new `Vec`.
220 ///
221 /// This function will always succeed if the passed shards can be merged in-place
222 /// or if they're the only two shards within a Vec.
223 ///
224 /// Returns the merged shard on success and an `Err` otherwise.
225 ///
226 /// This function may take time line in the length of the input shards, but it will never allocate.
227 pub fn merge_noalloc(left: Self, right: Self) -> Result<Self, CantMerge<T, WouldAlloc>> {
228 use WouldMove::*;
229
230 let cant_merge = match Self::merge_inplace(left, right) {
231 // happy path
232 Ok(shard) => return Ok(shard),
233 Err(err) => err,
234 };
235
236 if cant_merge.reason == DifferentAllocations {
237 return Err(CantMerge {
238 left: cant_merge.left,
239 right: cant_merge.right,
240 reason: WouldAlloc::DifferentAllocations,
241 });
242 }
243
244 let (ldropper, ldata, llen) = cant_merge.left.into_raw_parts();
245 let (rdropper, rdata, rlen) = cant_merge.right.into_raw_parts();
246
247 if cant_merge.reason == WrongOrder {
248 // semi-fast path: we only need to rotate
249 unsafe { slice::from_raw_parts_mut(rdata, llen + rlen).rotate_left(rlen) };
250 Ok(VecShard {
251 dropper: ldropper,
252 data: rdata,
253 len: llen + rlen,
254 })
255 } else if cant_merge.reason == NotAdjacent && Arc::strong_count(&ldropper) == 2 {
256 // There are only 2 references to the dropper left,
257 // and we're holding ldropper and rdropper, so we can freely re-use the allocation
258
259 let new_data = unsafe {
260 if rdata < ldata {
261 // If right is actually on the left side, we have to shuffle things around
262 if llen < rlen {
263 // ... |---------- r ----------| ... |------ l ------|
264 ptr::swap_nonoverlapping(rdata, ldata, llen);
265 // ... |------ l ------|- ..r -| ... |----- r.. -----|
266 ptr::copy(ldata, rdata.add(rlen), llen);
267 // ... |------ l ------|- ..r -|----- r.. -----| ...
268 slice::from_raw_parts_mut(rdata.add(llen), rlen).rotate_left(rlen - llen);
269 // ... |------ l ------|---------- r ----------| ...
270 } else {
271 // ... |------ r ------| ... |---------- l ----------|
272 ptr::swap_nonoverlapping(rdata, ldata, rlen);
273 // ... |----- l.. -----| ... |------ r ------|- ..l -|
274 slice::from_raw_parts_mut(ldata, llen).rotate_left(rlen);
275 // ... |----- l.. -----| ... |- ..l -|------ r ------|
276 ptr::copy(ldata, rdata.add(rlen), llen);
277 // ... |---------- l ----------|------ r ------| ...
278 };
279 rdata
280 } else {
281 // Otherwise, just scootch it over
282 // ... |---------- l ----------| ... |------ r ------|
283 ptr::copy(rdata, ldata.add(llen), rlen);
284 // ... |---------- l ----------|------ r ------| ...
285 ldata
286 }
287 };
288 Ok(VecShard {
289 data: new_data,
290 len: llen + rlen,
291 dropper: ldropper,
292 })
293 } else {
294 Err(CantMerge {
295 reason: WouldAlloc::OtherShardsLeft,
296 left: VecShard {
297 dropper: ldropper,
298 data: ldata,
299 len: llen,
300 },
301 right: VecShard {
302 dropper: rdropper,
303 data: rdata,
304 len: rlen,
305 },
306 })
307 }
308 }
309
310 /// Merge the given shards into a single shard.
311 ///
312 /// This will attempt an O(1) merge like `merge_inplace` but fall back to copying slices around
313 /// within their allocation and possibly allocating a new Vec if needed.
314 pub fn merge(left: Self, right: Self) -> Self {
315 Self::merge_noalloc(left, right).unwrap_or_else(|err| {
316 let (_ldropper, ldata, llen) = err.left.into_raw_parts();
317 let (_rdropper, rdata, rlen) = err.right.into_raw_parts();
318
319 // Give up and allocate
320 let mut vec = Vec::with_capacity(llen + rlen);
321 unsafe {
322 ptr::copy(ldata, vec.as_mut_ptr(), llen);
323 ptr::copy(rdata, vec.as_mut_ptr().add(llen), rlen);
324 vec.set_len(llen + rlen);
325 }
326 Self::from(vec)
327 })
328 }
329}
330
331impl<T> ShardExt for VecShard<T> {
332 type Shard = VecShard<T>;
333
334 fn split_inplace_at(mut self, at: usize) -> (Self::Shard, Self::Shard) {
335 assert!(at <= self.len);
336
337 let right = VecShard {
338 dropper: self.dropper.clone(),
339 data: unsafe { self.data.add(at) },
340 len: self.len - at,
341 };
342
343 // for the left shard, just cut ourselves down to size
344 self.len = at;
345
346 (self, right)
347 }
348}
349
350impl<T> Drop for VecShard<T> {
351 fn drop(&mut self) {
352 // Drop all the elements
353 // The VecDropper will take care of freeing the Vec itself, if needed
354 for o in 0..self.len {
355 unsafe { ptr::drop_in_place(self.data.add(o)) };
356 }
357 }
358}
359
360impl<T> Deref for VecShard<T> {
361 type Target = [T];
362
363 fn deref(&self) -> &[T] {
364 unsafe { slice::from_raw_parts(self.data, self.len) }
365 }
366}
367
368impl<T> DerefMut for VecShard<T> {
369 fn deref_mut(&mut self) -> &mut [T] {
370 unsafe { slice::from_raw_parts_mut(self.data, self.len) }
371 }
372}
373
374impl<T, I: SliceIndex<[T]>> Index<I> for VecShard<T> {
375 type Output = <I as slice::SliceIndex<[T]>>::Output;
376
377 fn index(&self, idx: I) -> &Self::Output {
378 &((**self)[idx])
379 }
380}
381
382impl<T, I: SliceIndex<[T]>> IndexMut<I> for VecShard<T> {
383 fn index_mut(&mut self, idx: I) -> &mut Self::Output {
384 &mut ((**self)[idx])
385 }
386}
387
388impl<T: PartialEq> PartialEq for VecShard<T> {
389 fn eq(&self, rhs: &Self) -> bool {
390 **self == **rhs
391 }
392}
393
394impl<T: Eq> Eq for VecShard<T> {}
395
396impl<T> Iterator for VecShard<T> {
397 type Item = T;
398
399 fn next(&mut self) -> Option<T> {
400 if self.len > 0 {
401 let res = unsafe { self.data.read() };
402 self.len -= 1;
403 self.data = unsafe { self.data.add(1) };
404 Some(res)
405 } else {
406 None
407 }
408 }
409
410 fn size_hint(&self) -> (usize, Option<usize>) {
411 (self.len, Some(self.len))
412 }
413}
414
415impl<T> ExactSizeIterator for VecShard<T> {
416 fn len(&self) -> usize {
417 self.len
418 }
419}
420
421impl<T> DoubleEndedIterator for VecShard<T> {
422 fn next_back(&mut self) -> Option<T> {
423 if self.len > 0 {
424 self.len -= 1;
425 Some(unsafe { self.data.add(self.len).read() })
426 } else {
427 None
428 }
429 }
430}
431
432impl<T> FusedIterator for VecShard<T> {}
433
434impl<T: Hash> Hash for VecShard<T> {
435 fn hash<H: Hasher>(&self, state: &mut H) {
436 Hash::hash(&**self, state)
437 }
438}
439
440impl<T> From<Vec<T>> for VecShard<T> {
441 fn from(mut v: Vec<T>) -> Self {
442 let res = VecShard {
443 dropper: Arc::new(VecDropper {
444 ptr: v.as_mut_ptr(),
445 capacity: v.capacity(),
446 }),
447 data: v.as_mut_ptr(),
448 len: v.len(),
449 };
450 mem::forget(v);
451 res
452 }
453}
454
455impl<T> Into<Vec<T>> for VecShard<T> {
456 fn into(self) -> Vec<T> {
457 // First, move everything out of self so we don't drop anything
458 let (dropper, data, len) = self.into_raw_parts();
459
460 // Optimization: if this shard is the only one left from the backing Vec, we re-use its allocation
461 if let Ok(dropper) = Arc::try_unwrap(dropper) {
462 // If our data is already at the start of the backing Vec, we don't need to move it
463 if data != dropper.ptr {
464 unsafe { ptr::copy(data, dropper.ptr, len) };
465 }
466 let v = unsafe { Vec::from_raw_parts(dropper.ptr, len, dropper.capacity) };
467 // Make sure we don't drop anything that the new Vec will need
468 mem::forget(dropper);
469 v
470 } else {
471 // Otherwise, just allocate a new Vec
472 let mut v = Vec::with_capacity(len);
473 unsafe {
474 ptr::copy_nonoverlapping(data, v.as_mut_ptr(), len);
475 v.set_len(len);
476 };
477 v
478 }
479 }
480}
481
482impl<T: Clone> Clone for VecShard<T> {
483 fn clone(&self) -> VecShard<T> {
484 // Not much we can do here, just make a new Vec
485 let mut vec = Vec::with_capacity(self.len);
486 vec.extend_from_slice(unsafe { slice::from_raw_parts(self.data, self.len) });
487 VecShard::from(vec)
488 }
489}
490
491impl<T: fmt::Debug> fmt::Debug for VecShard<T> {
492 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
493 write!(f, "{:?}", &**self)
494 }
495}
496
497impl<T> ShardExt for Vec<T> {
498 type Shard = VecShard<T>;
499
500 fn split_inplace_at(self, at: usize) -> (Self::Shard, Self::Shard) {
501 VecShard::from(self).split_inplace_at(at)
502 }
503}