shale/
lib.rs

1use parking_lot::Mutex;
2use std::cell::UnsafeCell;
3use std::collections::{HashMap, HashSet};
4use std::fmt;
5use std::marker::PhantomData;
6use std::num::NonZeroUsize;
7use std::ops::Deref;
8use std::rc::Rc;
9use std::sync::Arc;
10
11pub mod block;
12pub mod compact;
13pub mod util;
14
15#[derive(Debug)]
16pub enum ShaleError {
17    LinearMemStoreError,
18    DecodeError,
19    ObjRefAlreadyInUse,
20    ObjPtrInvalid,
21    SliceError,
22}
23
24pub type SpaceID = u8;
25pub const INVALID_SPACE_ID: SpaceID = 0xff;
26
27pub struct DiskWrite {
28    pub space_id: SpaceID,
29    pub space_off: u64,
30    pub data: Box<[u8]>,
31}
32
33impl std::fmt::Debug for DiskWrite {
34    fn fmt(
35        &self, f: &mut std::fmt::Formatter<'_>,
36    ) -> Result<(), std::fmt::Error> {
37        write!(
38            f,
39            "[Disk space=0x{:02x} offset=0x{:04x} data=0x{}",
40            self.space_id,
41            self.space_off,
42            hex::encode(&self.data)
43        )
44    }
45}
46
47/// A handle that pins and provides a readable access to a portion of the linear memory image.
48pub trait MemView: Deref<Target = [u8]> {}
49
50/// In-memory store that offers access to intervals from a linear byte space, which is usually
51/// backed by a cached/memory-mapped pool of the accessed intervals from the underlying linear
52/// persistent store. Reads could trigger disk reads to bring data into memory, but writes will
53/// *only* be visible in memory (it does not write back to the disk).
54pub trait MemStore {
55    /// Returns a handle that pins the `length` of bytes starting from `offset` and makes them
56    /// directly accessible.
57    fn get_view(&self, offset: u64, length: u64) -> Option<Box<dyn MemView>>;
58    /// Returns a handle that allows shared access to the store.
59    fn get_shared(&self) -> Option<Box<dyn Deref<Target = dyn MemStore>>>;
60    /// Write the `change` to the portion of the linear space starting at `offset`. The change
61    /// should be immediately visible to all `MemView` associated to this linear space.
62    fn write(&self, offset: u64, change: &[u8]);
63    /// Returns the identifier of this storage space.
64    fn id(&self) -> SpaceID;
65}
66
67/// Opaque typed pointer in the 64-bit virtual addressable space.
68#[repr(C)]
69pub struct ObjPtr<T: ?Sized> {
70    pub(crate) addr: u64,
71    phantom: PhantomData<T>,
72}
73
74impl<T> std::cmp::PartialEq for ObjPtr<T> {
75    fn eq(&self, other: &ObjPtr<T>) -> bool {
76        self.addr == other.addr
77    }
78}
79
80impl<T> Eq for ObjPtr<T> {}
81impl<T> Copy for ObjPtr<T> {}
82impl<T> Clone for ObjPtr<T> {
83    fn clone(&self) -> Self {
84        *self
85    }
86}
87
88impl<T> std::hash::Hash for ObjPtr<T> {
89    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
90        self.addr.hash(state)
91    }
92}
93
94impl<T: ?Sized> fmt::Display for ObjPtr<T> {
95    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
96        write!(f, "[ObjPtr addr={:08x}]", self.addr)
97    }
98}
99
100impl<T: ?Sized> ObjPtr<T> {
101    pub fn null() -> Self {
102        Self::new(0)
103    }
104    pub fn is_null(&self) -> bool {
105        self.addr == 0
106    }
107    pub fn addr(&self) -> u64 {
108        self.addr
109    }
110
111    #[inline(always)]
112    pub(crate) fn new(addr: u64) -> Self {
113        ObjPtr {
114            addr,
115            phantom: PhantomData,
116        }
117    }
118
119    #[inline(always)]
120    pub unsafe fn new_from_addr(addr: u64) -> Self {
121        Self::new(addr)
122    }
123}
124
125/// A addressed, typed, and read-writable handle for the stored item in [ShaleStore]. The object
126/// represents the decoded/mapped data. The implementation of [ShaleStore] could use [ObjCache] to
127/// turn a `TypedView` into an [ObjRef].
128pub trait TypedView<T: ?Sized>: Deref<Target = T> {
129    /// Get the offset of the initial byte in the linear space.
130    fn get_offset(&self) -> u64;
131    /// Access it as a [MemStore] object.
132    fn get_mem_store(&self) -> &dyn MemStore;
133    /// Estimate the serialized length of the current type content. It should not be smaller than
134    /// the actually length.
135    fn estimate_mem_image(&self) -> Option<u64>;
136    /// Serialize the type content to the memory image. It defines how the current in-memory object
137    /// of `T` should be represented in the linear storage space.
138    fn write_mem_image(&self, mem_image: &mut [u8]);
139    /// Gain mutable access to the typed content. By changing it, its serialized bytes (and length)
140    /// could change.
141    fn write(&mut self) -> &mut T;
142    /// Returns if the typed content is memory-mapped (i.e., all changes through `write` are auto
143    /// reflected in the underlying [MemStore]).
144    fn is_mem_mapped(&self) -> bool;
145}
146
147/// A wrapper of `TypedView` to enable writes. The direct construction (by [Obj::from_typed_view]
148/// or [MummyObj::ptr_to_obj]) could be useful for some unsafe access to a low-level item (e.g.
149/// headers/metadata at bootstrap or part of [ShaleStore] implementation) stored at a given [ObjPtr]
150/// . Users of [ShaleStore] implementation, however, will only use [ObjRef] for safeguarded access.
151pub struct Obj<T: ?Sized> {
152    value: Box<dyn TypedView<T>>,
153    dirty: Option<u64>,
154}
155
156impl<T: ?Sized> Obj<T> {
157    #[inline(always)]
158    pub fn as_ptr(&self) -> ObjPtr<T> {
159        ObjPtr::<T>::new(self.value.get_offset())
160    }
161}
162
163impl<T: ?Sized> Obj<T> {
164    /// Write to the underlying object. Returns `Some(())` on success.
165    #[inline]
166    pub fn write(&mut self, modify: impl FnOnce(&mut T) -> ()) -> Option<()> {
167        modify(self.value.write());
168        // if `estimate_mem_image` gives overflow, the object will not be written
169        self.dirty = None;
170        Some(self.dirty = Some(self.value.estimate_mem_image()?))
171    }
172
173    #[inline(always)]
174    pub fn get_space_id(&self) -> SpaceID {
175        self.value.get_mem_store().id()
176    }
177
178    #[inline(always)]
179    pub fn from_typed_view(value: Box<dyn TypedView<T>>) -> Self {
180        Obj { value, dirty: None }
181    }
182
183    pub fn flush_dirty(&mut self) {
184        if !self.value.is_mem_mapped() {
185            if let Some(new_value_len) = self.dirty.take() {
186                let mut new_value = vec![0; new_value_len as usize];
187                self.value.write_mem_image(&mut new_value);
188                self.value
189                    .get_mem_store()
190                    .write(self.value.get_offset(), &new_value);
191            }
192        }
193    }
194}
195
196impl<T: ?Sized> Drop for Obj<T> {
197    fn drop(&mut self) {
198        self.flush_dirty()
199    }
200}
201
202impl<T> Deref for Obj<T> {
203    type Target = T;
204    fn deref(&self) -> &T {
205        &*self.value
206    }
207}
208
209/// User handle that offers read & write access to the stored [ShaleStore] item.
210pub struct ObjRef<T> {
211    inner: Option<Obj<T>>,
212    cache: ObjCache<T>,
213}
214
215impl<T> ObjRef<T> {
216    #[inline]
217    pub fn write(&mut self, modify: impl FnOnce(&mut T) -> ()) -> Option<()> {
218        let inner = self.inner.as_mut().unwrap();
219        inner.write(modify)?;
220        self.cache.0.lock().dirty.insert(inner.as_ptr());
221        Some(())
222    }
223}
224
225impl<T> Deref for ObjRef<T> {
226    type Target = Obj<T>;
227    fn deref(&self) -> &Obj<T> {
228        self.inner.as_ref().unwrap()
229    }
230}
231
232impl<T> Drop for ObjRef<T> {
233    fn drop(&mut self) {
234        let mut inner = self.inner.take().unwrap();
235        let ptr = inner.as_ptr();
236        let mut cache = self.cache.0.lock();
237        if cache.pinned.remove(&ptr).unwrap() {
238            inner.dirty = None;
239        } else {
240            cache.cached.put(ptr, inner);
241        }
242    }
243}
244
245/// A persistent item storage backed by linear logical space. New items can be created and old
246/// items could be retrieved or dropped.
247pub trait ShaleStore<T> {
248    /// Dereference [ObjPtr] to a unique handle that allows direct access to the item in memory.
249    fn get_item<'a>(&'a self, ptr: ObjPtr<T>) -> Result<ObjRef<T>, ShaleError>;
250    /// Allocate a new item.
251    fn put_item<'a>(
252        &'a self, item: T, extra: u64,
253    ) -> Result<ObjRef<T>, ShaleError>;
254    /// Free an item and recycle its space when applicable.
255    fn free_item(&mut self, item: ObjPtr<T>) -> Result<(), ShaleError>;
256    /// Flush all dirty writes.
257    fn flush_dirty(&self) -> Option<()>;
258}
259
260/// A stored item type that can be decoded from or encoded to on-disk raw bytes. An efficient
261/// implementation could be directly transmuting to/from a POD struct. But sometimes necessary
262/// compression/decompression is needed to reduce disk I/O and facilitate faster in-memory access.
263pub trait MummyItem {
264    fn dehydrated_len(&self) -> u64;
265    fn dehydrate(&self, to: &mut [u8]);
266    fn hydrate(addr: u64, mem: &dyn MemStore) -> Result<Self, ShaleError>
267    where
268        Self: Sized;
269    fn is_mem_mapped(&self) -> bool {
270        false
271    }
272}
273
274pub fn to_dehydrated(item: &dyn MummyItem) -> Vec<u8> {
275    let mut buff = vec![0; item.dehydrated_len() as usize];
276    item.dehydrate(&mut buff);
277    buff
278}
279
280/// Reference implementation of [TypedView]. It takes any type that implements [MummyItem] and
281/// should be useful for most applications.
282pub struct MummyObj<T> {
283    decoded: T,
284    mem: Box<dyn Deref<Target = dyn MemStore>>,
285    offset: u64,
286    len_limit: u64,
287}
288
289impl<T> Deref for MummyObj<T> {
290    type Target = T;
291    fn deref(&self) -> &T {
292        &self.decoded
293    }
294}
295
296impl<T: MummyItem> TypedView<T> for MummyObj<T> {
297    fn get_offset(&self) -> u64 {
298        self.offset
299    }
300
301    fn get_mem_store(&self) -> &dyn MemStore {
302        &**self.mem
303    }
304
305    fn estimate_mem_image(&self) -> Option<u64> {
306        let len = self.decoded.dehydrated_len();
307        if len as u64 > self.len_limit {
308            None
309        } else {
310            Some(len)
311        }
312    }
313
314    fn write_mem_image(&self, mem_image: &mut [u8]) {
315        self.decoded.dehydrate(mem_image)
316    }
317
318    fn write(&mut self) -> &mut T {
319        &mut self.decoded
320    }
321    fn is_mem_mapped(&self) -> bool {
322        self.decoded.is_mem_mapped()
323    }
324}
325
326impl<T: MummyItem + 'static> MummyObj<T> {
327    #[inline(always)]
328    fn new(
329        offset: u64, len_limit: u64, space: &dyn MemStore,
330    ) -> Result<Self, ShaleError> {
331        let decoded = T::hydrate(offset, space)?;
332        Ok(Self {
333            offset,
334            decoded,
335            mem: space.get_shared().ok_or(ShaleError::LinearMemStoreError)?,
336            len_limit,
337        })
338    }
339
340    #[inline(always)]
341    fn from_hydrated(
342        offset: u64, len_limit: u64, decoded: T, space: &dyn MemStore,
343    ) -> Result<Self, ShaleError> {
344        Ok(Self {
345            offset,
346            decoded,
347            mem: space.get_shared().ok_or(ShaleError::LinearMemStoreError)?,
348            len_limit,
349        })
350    }
351
352    #[inline(always)]
353    pub unsafe fn ptr_to_obj(
354        store: &dyn MemStore, ptr: ObjPtr<T>, len_limit: u64,
355    ) -> Result<Obj<T>, ShaleError> {
356        Ok(Obj::from_typed_view(Box::new(Self::new(
357            ptr.addr(),
358            len_limit,
359            store,
360        )?)))
361    }
362
363    #[inline(always)]
364    pub unsafe fn item_to_obj(
365        store: &dyn MemStore, addr: u64, len_limit: u64, decoded: T,
366    ) -> Result<Obj<T>, ShaleError> {
367        Ok(Obj::from_typed_view(Box::new(Self::from_hydrated(
368            addr, len_limit, decoded, store,
369        )?)))
370    }
371}
372
373impl<T> MummyObj<T> {
374    unsafe fn new_from_slice(
375        offset: u64, len_limit: u64, decoded: T, space: &dyn MemStore,
376    ) -> Result<Self, ShaleError> {
377        Ok(Self {
378            offset,
379            decoded,
380            mem: space.get_shared().ok_or(ShaleError::LinearMemStoreError)?,
381            len_limit,
382        })
383    }
384
385    pub unsafe fn slice<U: MummyItem + 'static>(
386        s: &Obj<T>, offset: u64, length: u64, decoded: U,
387    ) -> Result<Obj<U>, ShaleError> {
388        let addr_ = s.value.get_offset() + offset;
389        if s.dirty.is_some() {
390            return Err(ShaleError::SliceError)
391        }
392        let r = Box::new(MummyObj::new_from_slice(
393            addr_,
394            length,
395            decoded,
396            s.value.get_mem_store(),
397        )?);
398        Ok(Obj {
399            value: r,
400            dirty: None,
401        })
402    }
403}
404
405impl<T> ObjPtr<T> {
406    const MSIZE: u64 = 8;
407}
408
409impl<T> MummyItem for ObjPtr<T> {
410    fn dehydrated_len(&self) -> u64 {
411        Self::MSIZE
412    }
413
414    fn dehydrate(&self, to: &mut [u8]) {
415        use std::io::{Cursor, Write};
416        Cursor::new(to)
417            .write_all(&self.addr().to_le_bytes())
418            .unwrap();
419    }
420
421    fn hydrate(addr: u64, mem: &dyn MemStore) -> Result<Self, ShaleError> {
422        let raw = mem
423            .get_view(addr, Self::MSIZE)
424            .ok_or(ShaleError::LinearMemStoreError)?;
425        unsafe {
426            Ok(Self::new_from_addr(u64::from_le_bytes(
427                (**raw).try_into().unwrap(),
428            )))
429        }
430    }
431}
432
433/// Purely volatile, vector-based implementation for [MemStore]. This is good for testing or trying
434/// out stuff (persistent data structures) built on [ShaleStore] in memory, without having to write
435/// your own [MemStore] implementation.
436pub struct PlainMem {
437    space: Rc<UnsafeCell<Vec<u8>>>,
438    id: SpaceID,
439}
440
441impl PlainMem {
442    pub fn new(size: u64, id: SpaceID) -> Self {
443        let mut space = Vec::new();
444        space.resize(size as usize, 0);
445        let space = Rc::new(UnsafeCell::new(space));
446        Self { space, id }
447    }
448
449    fn get_space_mut(&self) -> &mut Vec<u8> {
450        unsafe { &mut *self.space.get() }
451    }
452}
453
454impl MemStore for PlainMem {
455    fn get_view(&self, offset: u64, length: u64) -> Option<Box<dyn MemView>> {
456        let offset = offset as usize;
457        let length = length as usize;
458        if offset + length > self.get_space_mut().len() {
459            None
460        } else {
461            Some(Box::new(PlainMemView {
462                offset,
463                length,
464                mem: Self {
465                    space: self.space.clone(),
466                    id: self.id,
467                },
468            }))
469        }
470    }
471
472    fn get_shared(&self) -> Option<Box<dyn Deref<Target = dyn MemStore>>> {
473        Some(Box::new(PlainMemShared(Self {
474            space: self.space.clone(),
475            id: self.id,
476        })))
477    }
478
479    fn write(&self, offset: u64, change: &[u8]) {
480        let offset = offset as usize;
481        let length = change.len();
482        self.get_space_mut()[offset..offset + length].copy_from_slice(change)
483    }
484
485    fn id(&self) -> SpaceID {
486        self.id
487    }
488}
489
490struct PlainMemView {
491    offset: usize,
492    length: usize,
493    mem: PlainMem,
494}
495
496struct PlainMemShared(PlainMem);
497
498impl Deref for PlainMemView {
499    type Target = [u8];
500    fn deref(&self) -> &[u8] {
501        &self.mem.get_space_mut()[self.offset..self.offset + self.length]
502    }
503}
504
505impl Deref for PlainMemShared {
506    type Target = dyn MemStore;
507    fn deref(&self) -> &(dyn MemStore + 'static) {
508        &self.0
509    }
510}
511
512impl MemView for PlainMemView {}
513
514struct ObjCacheInner<T: ?Sized> {
515    cached: lru::LruCache<ObjPtr<T>, Obj<T>>,
516    pinned: HashMap<ObjPtr<T>, bool>,
517    dirty: HashSet<ObjPtr<T>>,
518}
519
520/// [ObjRef] pool that is used by [ShaleStore] implementation to construct [ObjRef]s.
521pub struct ObjCache<T: ?Sized>(Arc<Mutex<ObjCacheInner<T>>>);
522
523impl<T> ObjCache<T> {
524    pub fn new(capacity: usize) -> Self {
525        Self(Arc::new(Mutex::new(ObjCacheInner {
526            cached: lru::LruCache::new(
527                NonZeroUsize::new(capacity).expect("non-zero cache size"),
528            ),
529            pinned: HashMap::new(),
530            dirty: HashSet::new(),
531        })))
532    }
533
534    #[inline(always)]
535    pub fn get(&self, ptr: ObjPtr<T>) -> Result<Option<ObjRef<T>>, ShaleError> {
536        let inner = &mut self.0.lock();
537        if let Some(r) = inner.cached.pop(&ptr) {
538            if inner.pinned.insert(ptr, false).is_some() {
539                return Err(ShaleError::ObjRefAlreadyInUse)
540            }
541            return Ok(Some(ObjRef {
542                inner: Some(r),
543                cache: Self(self.0.clone()),
544            }))
545        }
546        Ok(None)
547    }
548
549    #[inline(always)]
550    pub fn put(&self, inner: Obj<T>) -> ObjRef<T> {
551        let ptr = inner.as_ptr();
552        self.0.lock().pinned.insert(ptr, false);
553        ObjRef {
554            inner: Some(inner),
555            cache: Self(self.0.clone()),
556        }
557    }
558
559    #[inline(always)]
560    pub fn pop(&self, ptr: ObjPtr<T>) {
561        let mut inner = self.0.lock();
562        if let Some(f) = inner.pinned.get_mut(&ptr) {
563            *f = true
564        }
565        if let Some(mut r) = inner.cached.pop(&ptr) {
566            r.dirty = None
567        }
568        inner.dirty.remove(&ptr);
569    }
570
571    pub fn flush_dirty(&self) -> Option<()> {
572        let mut inner = self.0.lock();
573        if !inner.pinned.is_empty() {
574            return None
575        }
576        for ptr in std::mem::replace(&mut inner.dirty, HashSet::new()) {
577            if let Some(r) = inner.cached.peek_mut(&ptr) {
578                r.flush_dirty()
579            }
580        }
581        Some(())
582    }
583}