imara_diff/histogram/
list_pool.rs

1use crate::histogram::MAX_CHAIN_LEN;
2
3/// A small list of entity references allocated from a pool.
4///
5/// An `ListHandle` type provides similar functionality to `Vec`, but with some important
6/// differences in the implementation:
7///
8/// 1. Memory is allocated from a `ListPool` instead of the global heap.
9/// 2. The footprint of an entity list is 4 bytes, compared with the 24 bytes for `Vec`.
10/// 3. An entity list doesn't implement `Drop`, leaving it to the pool to manage memory.
11///
12/// The list pool is intended to be used as a LIFO allocator. After building up a larger data
13/// structure with many list references, the whole thing can be discarded quickly by clearing the
14/// pool.
15///
16/// # Safety
17///
18/// Entity lists are not as safe to use as `Vec`, but they never jeopardize Rust's memory safety
19/// guarantees. These are the problems to be aware of:
20///
21/// - If you lose track of an entity list, its memory won't be recycled until the pool is cleared.
22///   This can cause the pool to grow very large with leaked lists.
23/// - If entity lists are used after their pool is cleared, they may contain garbage data, and
24///   modifying them may corrupt other lists in the pool.
25/// - If an entity list is used with two different pool instances, both pools are likely to become
26///   corrupted.
27///
28/// Entity lists can be cloned, but that operation should only be used as part of cloning the whole
29/// function they belong to. *Cloning an entity list does not allocate new memory for the clone*.
30/// It creates an alias of the same memory.
31///
32/// Entity lists cannot be hashed and compared for equality because it's not possible to compare the
33/// contents of the list without the pool reference.
34///
35/// # Implementation
36///
37/// The `ListHandle` itself is designed to have the smallest possible footprint. This is important
38/// because it is used inside very compact data structures like `InstructionData`. The list
39/// contains only a 32-bit index into the pool's memory vector, pointing to the first element of
40/// the list.
41///
42/// The pool is just a single `Vec` containing all of the allocated lists. Each list is
43/// represented as three contiguous parts:
44///
45/// 1. The number of elements in the list.
46/// 2. The list elements.
47/// 3. Excess capacity elements.
48///
49/// The total size of the three parts is always a power of two, and the excess capacity is always
50/// as small as possible. This means that shrinking a list may cause the excess capacity to shrink
51/// if a smaller power-of-two size becomes available.
52///
53/// Both growing and shrinking a list may cause it to be reallocated in the pool vector.
54///
55/// The index stored in an `ListHandle` points to part 2, the list elements. The value 0 is
56/// reserved for the empty list which isn't allocated in the vector.
57#[derive(Clone, Debug, PartialEq, Eq)]
58pub struct ListHandle {
59    index: u32,
60    generation: u32,
61    len: u32,
62}
63
64/// Create an empty list.
65impl Default for ListHandle {
66    fn default() -> Self {
67        Self {
68            index: 0,
69            generation: 0,
70            len: 0,
71        }
72    }
73}
74
75const MAX_SIZE_CLASS: SizeClass = sclass_for_length(super::MAX_CHAIN_LEN - 1);
76const NUM_SIZE_CLASS: usize = MAX_SIZE_CLASS as usize + 1;
77
78/// A memory pool for storing lists of `T`.
79#[derive(Clone, Debug)]
80pub struct ListPool {
81    // The main array containing the lists.
82    data: Vec<u32>,
83
84    // Heads of the free lists, one for each size class.
85    free: [u32; NUM_SIZE_CLASS],
86
87    generation: u32,
88}
89
90/// Lists are allocated in sizes that are powers of two, starting from 4.
91/// Each power of two is assigned a size class number, so the size is `4 << SizeClass`.
92type SizeClass = u8;
93
94/// Get the size of a given size class. The size includes the length field, so the maximum list
95/// length is one less than the class size.
96#[inline]
97const fn sclass_size(sclass: SizeClass) -> usize {
98    4 << sclass
99}
100
101/// Get the size class to use for a given list length.
102/// This always leaves room for the length element in addition to the list elements.
103#[inline]
104const fn sclass_for_length(len: u32) -> SizeClass {
105    30 - (len | 3).leading_zeros() as SizeClass
106}
107
108/// Is `len` the minimum length in its size class?
109#[inline]
110fn is_sclass_max_length(len: u32) -> bool {
111    len > 3 && len.is_power_of_two()
112}
113
114impl ListPool {
115    /// Create a new list pool.
116    pub fn new(capacity: u32) -> Self {
117        Self {
118            data: Vec::with_capacity(capacity as usize),
119            free: [u32::MAX; NUM_SIZE_CLASS],
120            generation: 1,
121        }
122    }
123
124    /// Clear the pool, forgetting about all lists that use it.
125    ///
126    /// This invalidates any existing entity lists that used this pool to allocate memory.
127    ///
128    /// The pool's memory is not released to the operating system, but kept around for faster
129    /// allocation in the future.
130    pub fn clear(&mut self) {
131        self.data.clear();
132        self.free.fill(u32::MAX);
133        self.generation += 1;
134    }
135
136    /// Allocate a storage block with a size given by `sclass`.
137    ///
138    /// Returns the first index of an available segment of `self.data` containing
139    /// `sclass_size(sclass)` elements. The allocated memory is filled with reserved
140    /// values.
141    fn alloc(&mut self, sclass: SizeClass) -> usize {
142        let freelist_head = self.free[sclass as usize];
143        // First try the free list for this size class.
144        if freelist_head == u32::MAX {
145            // Nothing on the free list. Allocate more memory.
146            let offset = self.data.len();
147            self.data.resize(offset + sclass_size(sclass), u32::MAX);
148            offset
149        } else {
150            // take allocation of the free list (linked list)
151            self.free[sclass as usize] = self.data[freelist_head as usize];
152            freelist_head as usize
153        }
154    }
155
156    /// Free a storage block with a size given by `sclass`.
157    ///
158    /// This must be a block that was previously allocated by `alloc()` with the same size class.
159    fn free(&mut self, block: usize, sclass: SizeClass) {
160        let sclass = sclass as usize;
161        // Insert the block on the free list which is a single linked list.
162        self.data[block] = self.free[sclass];
163        self.free[sclass] = block as u32
164    }
165
166    /// Returns two mutable slices representing the two requested blocks.
167    ///
168    /// The two returned slices can be longer than the blocks. Each block is located at the front
169    /// of the respective slice.
170    fn mut_slices(&mut self, block0: usize, block1: usize) -> (&mut [u32], &mut [u32]) {
171        if block0 < block1 {
172            let (s0, s1) = self.data.split_at_mut(block1);
173            (&mut s0[block0..], s1)
174        } else {
175            let (s1, s0) = self.data.split_at_mut(block0);
176            (s0, &mut s1[block1..])
177        }
178    }
179
180    /// Reallocate a block to a different size class.
181    ///
182    /// Copy `elems_to_copy` elements from the old to the new block.
183    fn realloc(
184        &mut self,
185        block: usize,
186        from_sclass: SizeClass,
187        to_sclass: SizeClass,
188        elems_to_copy: usize,
189    ) -> usize {
190        debug_assert!(elems_to_copy <= sclass_size(from_sclass));
191        debug_assert!(elems_to_copy <= sclass_size(to_sclass));
192        let new_block = self.alloc(to_sclass);
193
194        let (old, new) = self.mut_slices(block, new_block);
195        new[0..elems_to_copy].copy_from_slice(&old[0..elems_to_copy]);
196
197        self.free(block, from_sclass);
198        new_block
199    }
200}
201
202impl ListHandle {
203    /// Get the number of elements in the list.
204    #[allow(clippy::len_without_is_empty)]
205    pub fn len(&self, pool: &ListPool) -> u32 {
206        if self.generation == pool.generation {
207            self.len
208        } else {
209            0
210        }
211    }
212
213    /// Get the list as a slice.
214    pub fn as_slice<'a>(&'a self, pool: &'a ListPool) -> &'a [u32] {
215        let idx = self.index as usize;
216        match self.len(pool) {
217            0 => &[],
218            1 => std::slice::from_ref(&self.index),
219            len => &pool.data[idx..idx + len as usize],
220        }
221    }
222
223    /// Appends an element to the back of the list.
224    /// Returns the index where the element was inserted.
225    pub fn push(&mut self, element: u32, pool: &mut ListPool) {
226        let len = self.len(pool);
227        match len {
228            0 => {
229                self.generation = pool.generation;
230                self.index = element;
231                self.len = 1;
232            }
233            1 => {
234                // This is an empty list. Allocate a block and set length=1.
235                let block = pool.alloc(0);
236                pool.data[block] = self.index;
237                pool.data[block + 1] = element;
238                self.index = block as u32;
239                self.len = 2;
240            }
241            2..=MAX_CHAIN_LEN => {
242                // Do we need to reallocate?
243                let block;
244                let idx = self.index as usize;
245                if is_sclass_max_length(len) {
246                    // Reallocate, preserving length + all old elements.
247                    let sclass = sclass_for_length(len);
248                    block = pool.realloc(idx, sclass - 1, sclass, len as usize);
249                    self.index = block as u32;
250                } else {
251                    block = idx;
252                }
253                pool.data[block + len as usize] = element;
254                self.len += 1;
255            }
256
257            // ignore elements longer then MAX_CHAIN_LEN
258            // these are rarely relevant and if they are we fall back to myers
259            _ => (),
260        }
261    }
262}