summavy_stacker/
memory_arena.rs

1//! 32-bits Memory arena for types implementing `Copy`.
2//! This Memory arena has been implemented to fit the use of tantivy's indexer
3//! and has *twisted specifications*.
4//!
5//! - It works on stable rust.
6//! - One can get an accurate figure of the memory usage of the arena.
7//! - Allocation are very cheap.
8//! - Allocation happening consecutively are very likely to have great locality.
9//! - Addresses (`Addr`) are 32bits.
10//! - Dropping the whole `MemoryArena` is cheap.
11//!
12//! # Limitations
13//!
14//! - Your object shall not implement `Drop`.
15//! - `Addr` to the `Arena` are 32-bits. The maximum capacity of the arena
16//! is 4GB. *(Tantivy's indexer uses one arena per indexing thread.)*
17//! - The arena only works for objects much smaller than  `1MB`.
18//! Allocating more than `1MB` at a time will result in a panic,
19//! and allocating a lot of large object (> 500KB) will result in a fragmentation.
20//! - Your objects are store in an unaligned fashion. For this reason,
21//! the API does not let you access them as references.
22//!
23//! Instead, you store and access your data via `.write(...)` and `.read(...)`, which under the hood
24//! stores your object using `ptr::write_unaligned` and `ptr::read_unaligned`.
25use std::{mem, ptr};
26
27const NUM_BITS_PAGE_ADDR: usize = 20;
28const PAGE_SIZE: usize = 1 << NUM_BITS_PAGE_ADDR; // pages are 1 MB large
29
30/// Represents a pointer into the `MemoryArena`
31/// .
32/// Pointer are 32-bits and are split into
33/// two parts.
34///
35/// The first 12 bits represent the id of a
36/// page of memory.
37///
38/// The last 20 bits are an address within this page of memory.
39#[derive(Copy, Clone, Debug)]
40pub struct Addr(u32);
41
42impl Addr {
43    /// Creates a null pointer.
44    pub fn null_pointer() -> Addr {
45        Addr(u32::MAX)
46    }
47
48    /// Returns the `Addr` object for `addr + offset`
49    pub fn offset(self, offset: u32) -> Addr {
50        Addr(self.0.wrapping_add(offset))
51    }
52
53    fn new(page_id: usize, local_addr: usize) -> Addr {
54        Addr((page_id << NUM_BITS_PAGE_ADDR | local_addr) as u32)
55    }
56
57    fn page_id(self) -> usize {
58        (self.0 as usize) >> NUM_BITS_PAGE_ADDR
59    }
60
61    fn page_local_addr(self) -> usize {
62        (self.0 as usize) & (PAGE_SIZE - 1)
63    }
64
65    /// Returns true if and only if the `Addr` is null.
66    pub fn is_null(self) -> bool {
67        self.0 == u32::MAX
68    }
69}
70
71pub fn store<Item: Copy + 'static>(dest: &mut [u8], val: Item) {
72    assert_eq!(dest.len(), std::mem::size_of::<Item>());
73    unsafe {
74        ptr::write_unaligned(dest.as_mut_ptr() as *mut Item, val);
75    }
76}
77
78pub fn load<Item: Copy + 'static>(data: &[u8]) -> Item {
79    assert_eq!(data.len(), std::mem::size_of::<Item>());
80    unsafe { ptr::read_unaligned(data.as_ptr() as *const Item) }
81}
82
83/// The `MemoryArena`
84#[allow(clippy::new_without_default)]
85pub struct MemoryArena {
86    pages: Vec<Page>,
87}
88
89impl Default for MemoryArena {
90    fn default() -> MemoryArena {
91        let first_page = Page::new(0);
92        MemoryArena {
93            pages: vec![first_page],
94        }
95    }
96}
97
98impl MemoryArena {
99    fn add_page(&mut self) -> &mut Page {
100        let new_page_id = self.pages.len();
101        self.pages.push(Page::new(new_page_id));
102        &mut self.pages[new_page_id]
103    }
104
105    /// Returns an estimate in number of bytes
106    /// of resident memory consumed by the `MemoryArena`.
107    ///
108    /// Internally, it counts a number of `1MB` pages
109    /// and therefore delivers an upperbound.
110    pub fn mem_usage(&self) -> usize {
111        self.pages.len() * PAGE_SIZE
112    }
113
114    pub fn write_at<Item: Copy + 'static>(&mut self, addr: Addr, val: Item) {
115        let dest = self.slice_mut(addr, std::mem::size_of::<Item>());
116        store(dest, val);
117    }
118
119    /// Read an item in the memory arena at the given `address`.
120    ///
121    /// # Panics
122    ///
123    /// If the address is erroneous
124    pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item {
125        load(self.slice(addr, mem::size_of::<Item>()))
126    }
127
128    pub fn slice(&self, addr: Addr, len: usize) -> &[u8] {
129        self.pages[addr.page_id()].slice(addr.page_local_addr(), len)
130    }
131
132    pub fn slice_from(&self, addr: Addr) -> &[u8] {
133        self.pages[addr.page_id()].slice_from(addr.page_local_addr())
134    }
135
136    #[inline]
137    pub fn slice_mut(&mut self, addr: Addr, len: usize) -> &mut [u8] {
138        self.pages[addr.page_id()].slice_mut(addr.page_local_addr(), len)
139    }
140
141    /// Allocates `len` bytes and returns the allocated address.
142    pub fn allocate_space(&mut self, len: usize) -> Addr {
143        let page_id = self.pages.len() - 1;
144        if let Some(addr) = self.pages[page_id].allocate_space(len) {
145            return addr;
146        }
147        self.add_page().allocate_space(len).unwrap()
148    }
149}
150
151struct Page {
152    page_id: usize,
153    len: usize,
154    data: Box<[u8]>,
155}
156
157impl Page {
158    fn new(page_id: usize) -> Page {
159        Page {
160            page_id,
161            len: 0,
162            data: vec![0u8; PAGE_SIZE].into_boxed_slice(),
163        }
164    }
165
166    #[inline]
167    fn is_available(&self, len: usize) -> bool {
168        len + self.len <= PAGE_SIZE
169    }
170
171    fn slice(&self, local_addr: usize, len: usize) -> &[u8] {
172        &self.slice_from(local_addr)[..len]
173    }
174
175    fn slice_from(&self, local_addr: usize) -> &[u8] {
176        &self.data[local_addr..]
177    }
178
179    fn slice_mut(&mut self, local_addr: usize, len: usize) -> &mut [u8] {
180        &mut self.data[local_addr..][..len]
181    }
182
183    fn allocate_space(&mut self, len: usize) -> Option<Addr> {
184        if self.is_available(len) {
185            let addr = Addr::new(self.page_id, self.len);
186            self.len += len;
187            Some(addr)
188        } else {
189            None
190        }
191    }
192}
193
194#[cfg(test)]
195mod tests {
196
197    use super::MemoryArena;
198
199    #[test]
200    fn test_arena_allocate_slice() {
201        let mut arena = MemoryArena::default();
202        let a = b"hello";
203        let b = b"happy tax payer";
204
205        let addr_a = arena.allocate_space(a.len());
206        arena.slice_mut(addr_a, a.len()).copy_from_slice(a);
207
208        let addr_b = arena.allocate_space(b.len());
209        arena.slice_mut(addr_b, b.len()).copy_from_slice(b);
210
211        assert_eq!(arena.slice(addr_a, a.len()), a);
212        assert_eq!(arena.slice(addr_b, b.len()), b);
213    }
214
215    #[derive(Clone, Copy, Debug, Eq, PartialEq)]
216    struct MyTest {
217        pub a: usize,
218        pub b: u8,
219        pub c: u32,
220    }
221
222    #[test]
223    fn test_store_object() {
224        let mut arena = MemoryArena::default();
225        let a = MyTest {
226            a: 143,
227            b: 21,
228            c: 32,
229        };
230        let b = MyTest {
231            a: 113,
232            b: 221,
233            c: 12,
234        };
235
236        let num_bytes = std::mem::size_of::<MyTest>();
237        let addr_a = arena.allocate_space(num_bytes);
238        arena.write_at(addr_a, a);
239
240        let addr_b = arena.allocate_space(num_bytes);
241        arena.write_at(addr_b, b);
242
243        assert_eq!(arena.read::<MyTest>(addr_a), a);
244        assert_eq!(arena.read::<MyTest>(addr_b), b);
245    }
246}