1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
use crate::{
arenas::atomic_bucket::{AtomicBucket, AtomicBucketList},
Capacity, LassoError, LassoErrorKind, LassoResult, MemoryLimits,
};
use core::{
fmt::{self, Debug},
num::NonZeroUsize,
slice, str,
sync::atomic::{AtomicUsize, Ordering},
};
/// An arena allocator that dynamically grows in size when needed, allocating memory in large chunks
pub(crate) struct LockfreeArena {
/// All the internal buckets, storing all allocated and unallocated items
// TODO: We could keep around a second list of buckets to store filled buckets
// in to keep us from having to iterate over them, need more tests to
// see what the impact of that is
buckets: AtomicBucketList,
/// The default capacity of each bucket
///
/// Invariant: `bucket_capacity` must never be zero
bucket_capacity: AtomicUsize,
memory_usage: AtomicUsize,
max_memory_usage: AtomicUsize,
}
impl LockfreeArena {
/// Create a new Arena with the default bucket size of 4096 bytes
pub fn new(capacity: NonZeroUsize, max_memory_usage: usize) -> LassoResult<Self> {
Ok(Self {
// Allocate one bucket
buckets: AtomicBucketList::new(capacity)?,
bucket_capacity: AtomicUsize::new(capacity.get()),
// The current capacity is whatever size the bucket we just allocated is
memory_usage: AtomicUsize::new(capacity.get()),
max_memory_usage: AtomicUsize::new(max_memory_usage),
})
}
#[inline]
pub(crate) fn current_memory_usage(&self) -> usize {
self.memory_usage.load(Ordering::Relaxed)
}
#[inline]
pub(crate) fn set_max_memory_usage(&self, max_memory_usage: usize) {
self.max_memory_usage
.store(max_memory_usage, Ordering::Relaxed);
}
#[inline]
pub(crate) fn get_max_memory_usage(&self) -> usize {
self.max_memory_usage.load(Ordering::Relaxed)
}
fn set_bucket_capacity(&self, capacity: usize) {
debug_assert_ne!(capacity, 0);
self.bucket_capacity.store(capacity, Ordering::Relaxed);
}
/// Doesn't actually allocate anything, but increments `self.memory_usage` and returns `None` if
/// the attempted amount surpasses `max_memory_usage`
// TODO: Make this return a `Result`
fn allocate_memory(&self, requested_mem: usize) -> LassoResult<()> {
if self.memory_usage.load(Ordering::Relaxed) + requested_mem
> self.max_memory_usage.load(Ordering::Relaxed)
{
Err(LassoError::new(LassoErrorKind::MemoryLimitReached))
} else {
self.memory_usage
.fetch_add(requested_mem, Ordering::Relaxed);
Ok(())
}
}
/// Store a slice in the Arena, returning `None` if memory is exhausted
///
/// # Safety
///
/// The reference passed back must be dropped before the arena that created it is
///
pub unsafe fn store_str(&self, string: &str) -> LassoResult<&'static str> {
// If the string is empty, simply return an empty string.
// This ensures that only strings with lengths greater
// than zero will be allocated within the arena
if string.is_empty() {
return Ok("");
}
let slice = string.as_bytes();
debug_assert_ne!(slice.len(), 0);
// Iterate over all of the buckets within the list while attempting to find one
// that has enough space to fit our string within it
//
// This is a tradeoff between allocation speed and memory usage. As-is we prioritize
// allocation speed in exchange for potentially missing possible reuse situations
// and then allocating more memory than is strictly necessary. In practice this shouldn't
// really matter, but it's worth that the opposite tradeoff can be made by adding bounded
// retries within this loop, the worst-case performance suffers in exchange for potentially
// better memory usage.
for bucket in self.buckets.iter() {
if let Ok(start) = bucket.try_inc_length(slice.len()) {
// Safety: We now have exclusive access to `bucket[start..start + slice.len()]`
let allocated = unsafe { bucket.slice_mut(start) };
// Copy the given slice into the allocation
unsafe { allocated.copy_from_nonoverlapping(slice.as_ptr(), slice.len()) };
// Return the successfully allocated string
let string = unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(allocated, slice.len()))
};
return Ok(string);
}
// Otherwise the bucket doesn't have sufficient capacity for the string
// so we carry on searching through allocated buckets
}
// If we couldn't find a pre-existing bucket with enough room in it, allocate our own bucket
let next_capacity = self.bucket_capacity.load(Ordering::Relaxed) * 2;
debug_assert_ne!(next_capacity, 0);
// If the current string's length is greater than the doubled current capacity, allocate a bucket exactly the
// size of the large string and push it back in the buckets vector. This ensures that obscenely large strings will
// not permanently affect the resource consumption of the interner
if slice.len() > next_capacity {
// Check that we haven't exhausted our memory limit
self.allocate_memory(slice.len())?;
// Safety: `len` will never be zero since we explicitly handled zero-length strings
// at the beginning of the function
let non_zero_len = unsafe { NonZeroUsize::new_unchecked(slice.len()) };
debug_assert_ne!(slice.len(), 0);
let mut bucket = AtomicBucket::with_capacity(non_zero_len)?;
// Safety: The new bucket will have exactly enough room for the string and we have
// exclusive access to the bucket since we just created it
let allocated_string = unsafe { bucket.push_slice(slice) };
self.buckets.push_front(bucket.into_ref());
Ok(allocated_string)
} else {
let memory_usage = self.current_memory_usage();
let max_memory_usage = self.get_max_memory_usage();
// If trying to use the doubled capacity will surpass our memory limit, just allocate as much as we can
if memory_usage + next_capacity > max_memory_usage {
let remaining_memory = max_memory_usage.saturating_sub(memory_usage);
// Check that we haven't exhausted our memory limit
self.allocate_memory(remaining_memory)?;
// Set the capacity to twice of what it currently is to allow for fewer allocations as more strings are interned
let mut bucket = AtomicBucket::with_capacity(
NonZeroUsize::new(remaining_memory)
.ok_or_else(|| LassoError::new(LassoErrorKind::MemoryLimitReached))?,
)?;
// Safety: The new bucket will have exactly enough room for the string and we have
// exclusive access to the bucket since we just created it
let allocated_string = unsafe { bucket.push_slice(slice) };
// TODO: Push the bucket to the back or something so that we can get it somewhat out
// of the search path, reduce the `n` in the `O(n)` list traversal
self.buckets.push_front(bucket.into_ref());
Ok(allocated_string)
// Otherwise just allocate a normal doubled bucket
} else {
// Check that we haven't exhausted our memory limit
self.allocate_memory(next_capacity)?;
// Set the capacity to twice of what it currently is to allow for fewer allocations as more strings are interned
self.set_bucket_capacity(next_capacity);
// Safety: `next_capacity` will never be zero
let capacity = unsafe { NonZeroUsize::new_unchecked(next_capacity) };
debug_assert_ne!(next_capacity, 0);
let mut bucket = AtomicBucket::with_capacity(capacity)?;
// Safety: The new bucket will have enough room for the string
let allocated_string = unsafe { bucket.push_slice(slice) };
self.buckets.push_front(bucket.into_ref());
Ok(allocated_string)
}
}
}
}
impl Default for LockfreeArena {
fn default() -> Self {
Self::new(
Capacity::default().bytes,
MemoryLimits::default().max_memory_usage,
)
.expect("failed to create default arena")
}
}
impl Debug for LockfreeArena {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
struct TotalBuckets(usize);
impl Debug for TotalBuckets {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.0 == 1 {
f.write_str("...1 bucket")
} else {
write!(f, "...{} buckets", self.0)
}
}
}
f.debug_struct("Arena")
.field("buckets", &TotalBuckets(self.buckets.len()))
.field(
"bucket_capacity",
&self.bucket_capacity.load(Ordering::Relaxed),
)
.field("memory_usage", &self.memory_usage.load(Ordering::Relaxed))
.field(
"max_memory_usage",
&self.max_memory_usage.load(Ordering::Relaxed),
)
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn string() {
let arena = LockfreeArena::default();
unsafe {
let idx = arena.store_str("test");
assert_eq!(idx, Ok("test"));
}
}
#[test]
fn empty_str() {
let arena = LockfreeArena::default();
unsafe {
let zst = arena.store_str("");
let zst1 = arena.store_str("");
let zst2 = arena.store_str("");
assert_eq!(zst, Ok(""));
assert_eq!(zst1, Ok(""));
assert_eq!(zst2, Ok(""));
}
}
#[test]
fn exponential_allocations() {
let arena = LockfreeArena::default();
let mut len = 4096;
for _ in 0..10 {
let large_string = "a".repeat(len);
let arena_string = unsafe { arena.store_str(&large_string) };
assert_eq!(arena_string, Ok(large_string.as_str()));
len *= 2;
}
}
#[test]
fn memory_exhausted() {
let arena = LockfreeArena::new(NonZeroUsize::new(10).unwrap(), 10).unwrap();
unsafe {
assert!(arena.store_str("0123456789").is_ok());
// ZSTs take up zero bytes
arena.store_str("").unwrap();
let err = arena.store_str("a").unwrap_err();
assert!(err.kind().is_memory_limit());
let err = arena.store_str("dfgsagdfgsdf").unwrap_err();
assert!(err.kind().is_memory_limit());
}
}
#[test]
fn allocate_too_much() {
let arena = LockfreeArena::new(NonZeroUsize::new(1).unwrap(), 10).unwrap();
unsafe {
let err = arena.store_str("abcdefghijklmnopqrstuvwxyz").unwrap_err();
assert!(err.kind().is_memory_limit());
}
}
#[test]
fn allocate_more_than_double() {
let arena = LockfreeArena::new(NonZeroUsize::new(1).unwrap(), 1000).unwrap();
unsafe {
assert!(arena.store_str("abcdefghijklmnopqrstuvwxyz").is_ok());
}
}
}