imara_diff/histogram/list_pool.rs
1use crate::histogram::MAX_CHAIN_LEN;
2
3/// A small list of entity references allocated from a pool.
4///
5/// An `ListHandle` type provides similar functionality to `Vec`, but with some important
6/// differences in the implementation:
7///
8/// 1. Memory is allocated from a `ListPool` instead of the global heap.
9/// 2. The footprint of an entity list is 4 bytes, compared with the 24 bytes for `Vec`.
10/// 3. An entity list doesn't implement `Drop`, leaving it to the pool to manage memory.
11///
12/// The list pool is intended to be used as a LIFO allocator. After building up a larger data
13/// structure with many list references, the whole thing can be discarded quickly by clearing the
14/// pool.
15///
16/// # Safety
17///
18/// Entity lists are not as safe to use as `Vec`, but they never jeopardize Rust's memory safety
19/// guarantees. These are the problems to be aware of:
20///
21/// - If you lose track of an entity list, its memory won't be recycled until the pool is cleared.
22/// This can cause the pool to grow very large with leaked lists.
23/// - If entity lists are used after their pool is cleared, they may contain garbage data, and
24/// modifying them may corrupt other lists in the pool.
25/// - If an entity list is used with two different pool instances, both pools are likely to become
26/// corrupted.
27///
28/// Entity lists can be cloned, but that operation should only be used as part of cloning the whole
29/// function they belong to. *Cloning an entity list does not allocate new memory for the clone*.
30/// It creates an alias of the same memory.
31///
32/// Entity lists cannot be hashed and compared for equality because it's not possible to compare the
33/// contents of the list without the pool reference.
34///
35/// # Implementation
36///
37/// The `ListHandle` itself is designed to have the smallest possible footprint. This is important
38/// because it is used inside very compact data structures like `InstructionData`. The list
39/// contains only a 32-bit index into the pool's memory vector, pointing to the first element of
40/// the list.
41///
42/// The pool is just a single `Vec` containing all of the allocated lists. Each list is
43/// represented as three contiguous parts:
44///
45/// 1. The number of elements in the list.
46/// 2. The list elements.
47/// 3. Excess capacity elements.
48///
49/// The total size of the three parts is always a power of two, and the excess capacity is always
50/// as small as possible. This means that shrinking a list may cause the excess capacity to shrink
51/// if a smaller power-of-two size becomes available.
52///
53/// Both growing and shrinking a list may cause it to be reallocated in the pool vector.
54///
55/// The index stored in an `ListHandle` points to part 2, the list elements. The value 0 is
56/// reserved for the empty list which isn't allocated in the vector.
57#[derive(Clone, Debug, PartialEq, Eq)]
58pub struct ListHandle {
59 index: u32,
60 generation: u32,
61 len: u32,
62}
63
64/// Create an empty list.
65impl Default for ListHandle {
66 fn default() -> Self {
67 Self {
68 index: 0,
69 generation: 0,
70 len: 0,
71 }
72 }
73}
74
75const MAX_SIZE_CLASS: SizeClass = sclass_for_length(super::MAX_CHAIN_LEN - 1);
76const NUM_SIZE_CLASS: usize = MAX_SIZE_CLASS as usize + 1;
77
78/// A memory pool for storing lists of `T`.
79#[derive(Clone, Debug)]
80pub struct ListPool {
81 // The main array containing the lists.
82 data: Vec<u32>,
83
84 // Heads of the free lists, one for each size class.
85 free: [u32; NUM_SIZE_CLASS],
86
87 generation: u32,
88}
89
90/// Lists are allocated in sizes that are powers of two, starting from 4.
91/// Each power of two is assigned a size class number, so the size is `4 << SizeClass`.
92type SizeClass = u8;
93
94/// Get the size of a given size class. The size includes the length field, so the maximum list
95/// length is one less than the class size.
96#[inline]
97const fn sclass_size(sclass: SizeClass) -> usize {
98 4 << sclass
99}
100
101/// Get the size class to use for a given list length.
102/// This always leaves room for the length element in addition to the list elements.
103#[inline]
104const fn sclass_for_length(len: u32) -> SizeClass {
105 30 - (len | 3).leading_zeros() as SizeClass
106}
107
108/// Is `len` the minimum length in its size class?
109#[inline]
110fn is_sclass_max_length(len: u32) -> bool {
111 len > 3 && len.is_power_of_two()
112}
113
114impl ListPool {
115 /// Create a new list pool.
116 pub fn new(capacity: u32) -> Self {
117 Self {
118 data: Vec::with_capacity(capacity as usize),
119 free: [u32::MAX; NUM_SIZE_CLASS],
120 generation: 1,
121 }
122 }
123
124 /// Clear the pool, forgetting about all lists that use it.
125 ///
126 /// This invalidates any existing entity lists that used this pool to allocate memory.
127 ///
128 /// The pool's memory is not released to the operating system, but kept around for faster
129 /// allocation in the future.
130 pub fn clear(&mut self) {
131 self.data.clear();
132 self.free.fill(u32::MAX);
133 self.generation += 1;
134 }
135
136 /// Allocate a storage block with a size given by `sclass`.
137 ///
138 /// Returns the first index of an available segment of `self.data` containing
139 /// `sclass_size(sclass)` elements. The allocated memory is filled with reserved
140 /// values.
141 fn alloc(&mut self, sclass: SizeClass) -> usize {
142 let freelist_head = self.free[sclass as usize];
143 // First try the free list for this size class.
144 if freelist_head == u32::MAX {
145 // Nothing on the free list. Allocate more memory.
146 let offset = self.data.len();
147 self.data.resize(offset + sclass_size(sclass), u32::MAX);
148 offset
149 } else {
150 // take allocation of the free list (linked list)
151 self.free[sclass as usize] = self.data[freelist_head as usize];
152 freelist_head as usize
153 }
154 }
155
156 /// Free a storage block with a size given by `sclass`.
157 ///
158 /// This must be a block that was previously allocated by `alloc()` with the same size class.
159 fn free(&mut self, block: usize, sclass: SizeClass) {
160 let sclass = sclass as usize;
161 // Insert the block on the free list which is a single linked list.
162 self.data[block] = self.free[sclass];
163 self.free[sclass] = block as u32
164 }
165
166 /// Returns two mutable slices representing the two requested blocks.
167 ///
168 /// The two returned slices can be longer than the blocks. Each block is located at the front
169 /// of the respective slice.
170 fn mut_slices(&mut self, block0: usize, block1: usize) -> (&mut [u32], &mut [u32]) {
171 if block0 < block1 {
172 let (s0, s1) = self.data.split_at_mut(block1);
173 (&mut s0[block0..], s1)
174 } else {
175 let (s1, s0) = self.data.split_at_mut(block0);
176 (s0, &mut s1[block1..])
177 }
178 }
179
180 /// Reallocate a block to a different size class.
181 ///
182 /// Copy `elems_to_copy` elements from the old to the new block.
183 fn realloc(
184 &mut self,
185 block: usize,
186 from_sclass: SizeClass,
187 to_sclass: SizeClass,
188 elems_to_copy: usize,
189 ) -> usize {
190 debug_assert!(elems_to_copy <= sclass_size(from_sclass));
191 debug_assert!(elems_to_copy <= sclass_size(to_sclass));
192 let new_block = self.alloc(to_sclass);
193
194 let (old, new) = self.mut_slices(block, new_block);
195 new[0..elems_to_copy].copy_from_slice(&old[0..elems_to_copy]);
196
197 self.free(block, from_sclass);
198 new_block
199 }
200}
201
202impl ListHandle {
203 /// Get the number of elements in the list.
204 #[allow(clippy::len_without_is_empty)]
205 pub fn len(&self, pool: &ListPool) -> u32 {
206 if self.generation == pool.generation {
207 self.len
208 } else {
209 0
210 }
211 }
212
213 /// Get the list as a slice.
214 pub fn as_slice<'a>(&'a self, pool: &'a ListPool) -> &'a [u32] {
215 let idx = self.index as usize;
216 match self.len(pool) {
217 0 => &[],
218 1 => std::slice::from_ref(&self.index),
219 len => &pool.data[idx..idx + len as usize],
220 }
221 }
222
223 /// Appends an element to the back of the list.
224 /// Returns the index where the element was inserted.
225 pub fn push(&mut self, element: u32, pool: &mut ListPool) {
226 let len = self.len(pool);
227 match len {
228 0 => {
229 self.generation = pool.generation;
230 self.index = element;
231 self.len = 1;
232 }
233 1 => {
234 // This is an empty list. Allocate a block and set length=1.
235 let block = pool.alloc(0);
236 pool.data[block] = self.index;
237 pool.data[block + 1] = element;
238 self.index = block as u32;
239 self.len = 2;
240 }
241 2..=MAX_CHAIN_LEN => {
242 // Do we need to reallocate?
243 let block;
244 let idx = self.index as usize;
245 if is_sclass_max_length(len) {
246 // Reallocate, preserving length + all old elements.
247 let sclass = sclass_for_length(len);
248 block = pool.realloc(idx, sclass - 1, sclass, len as usize);
249 self.index = block as u32;
250 } else {
251 block = idx;
252 }
253 pool.data[block + len as usize] = element;
254 self.len += 1;
255 }
256
257 // ignore elements longer then MAX_CHAIN_LEN
258 // these are rarely relevant and if they are we fall back to myers
259 _ => (),
260 }
261 }
262}