1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
//! # `outsource-heap`
//! This library provides tools for outsourcing your heap allocations to various places, fut: tasklfut: taskng:
//! - A local file
//! - A network drive
//! - Anything which implements the `Store` trait, like a `Vec<MaybeUninit<u8>>`
//!
//! Generally speaking, using this library is a bad idea.
//! Once the Rust standard library has a stable allocator API
//! this library will (hopefully) be made wholly obsolete.
//!
//! In the meantime, let's do some cursed nonsense.
//!
//! # Getting Started
//! This crate provides a global allocator, which is used to intercept all
//! allocations in your program so it can decide where to direct them.
//! Therefore, the following must be placed somewhere in your project:
//! ```
//! #[global_allocator]
//! static ALLOC: outsource_heap::Global = outsource_heap::Global::system();
//! ```
// TODO: put in specific notice that this library should only be used by
// end binaries, which are expected to control the global allocator
#![deny(unsafe_op_in_unsafe_fn)]
use ::core::{ptr::{self, NonNull}, cmp};
use ::core::cell::Cell;
use ::std::alloc::{GlobalAlloc, Layout, System};
use ::std::future::Future;
use ::std::sync::Mutex;
use ::std::collections::BTreeMap;

// We're using this inside a Mutex,
// so the Lazy doesn't need to provide its own synchronization.
use ::once_cell::unsync::Lazy;

#[cfg(feature = "file-backed")]
pub mod file_backed;

#[cfg(feature = "bounded")]
pub mod bounded;

// Note that the Sync requirement is because literally every use of Store
// provided by this crate needs the Store implementor to be Sync as references
// to it are essentially sent to and used by every thread.
// I added this requirement when adding the FileBacked Store, where the linked_list_allocator
// crate provides a non-Sync allocator.
/// This trait is a shim for the [`GlobalAlloc`] trait,
/// defined so that we can implement it for [`std`] defined types like [`Vec`].
/// See those docs for information on how to implement this trait correctly.
///
/// Future versions of this crate may allow for handling allocation failure,
/// but this is currently not supported.
pub unsafe trait Store: Sync {
    unsafe fn alloc(&self, layout: Layout) -> *mut u8;
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout);
    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
        let size = layout.size();
        // SAFETY: the safety contract for `alloc` must be upheld by the caller.
        let ptr = unsafe { self.alloc(layout) };
        if !ptr.is_null() {
            // SAFETY: as allocation succeeded, the region from `ptr`
            // of size `size` is guaranteed to be valid for writes.
            unsafe { ptr::write_bytes(ptr, 0, size) };
        }
        ptr
    }
    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
        // SAFETY: the caller must ensure that the `new_size` does not overflow.
        // `layout.align()` comes from a `Layout` and is thus guaranteed to be valid.
        let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, layout.align()) };
        // SAFETY: the caller must ensure that `new_layout` is greater than zero.
        let new_ptr = unsafe { self.alloc(new_layout) };
        if !new_ptr.is_null() {
            // SAFETY: the previously allocated block cannot overlap the newly allocated block.
            // The safety contract for `dealloc` must be upheld by the caller.
            unsafe {
                ptr::copy_nonoverlapping(ptr, new_ptr, cmp::min(layout.size(), new_size));
                self.dealloc(ptr, layout);
            }
        }
        new_ptr
    }
}

unsafe impl<T: GlobalAlloc + Sync> Store for T {
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
        unsafe { GlobalAlloc::alloc(self, layout) }
    }
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
        unsafe { GlobalAlloc::dealloc(self, ptr, layout) }
    }
    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
        unsafe { GlobalAlloc::alloc_zeroed(self, layout) }
    }
    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
        unsafe { GlobalAlloc::realloc(self, ptr, layout, new_size) }
    }
}

// TODO: either mark this as an unsafe fn or solve the following:
// - the fact that spawning a thread escapes stores::current()
//   is problematic in its own right
/// Run a closure using a chosen global allocator.
/// Threads spawned inside this closure will not use the assigned allocator.
pub fn run<H: Store, T, F: FnOnce() -> T>(store: &'static H, task: F) -> T {
    let prev = unsafe { stores::register(store) };
    ::unwind_safe::try_eval(|| {
        task()
    }).finally(|_| {
        unsafe { stores::reregister(prev) };
    })
}

/// # Safety
/// Ensure that objects allocated with the provided `store`
/// do not escape the given closure.
pub unsafe fn run_borrowed<H: Store, T, F: FnOnce() -> T>(store: &H, task: F) -> T {
    let prev = unsafe { stores::register(store) };
    ::unwind_safe::try_eval(|| {
        task()
    }).finally(|_| {
        unsafe { stores::reregister(prev) };
    })
}

pub fn run_async<H: Store, T, F: Future<Output = T>>(store: &'static H, task: F)
    -> impl Future<Output = T>
{
    // Steps:
    // 1. register the store
    // 2. deregister the store for each yield
    // 3. reregister the store for each poll
    // 4. deregister the store for return
    use ::core::pin::Pin;
    use ::core::task::{Context, Poll};
    ::pin_project_lite::pin_project! {
        struct Task<S: 'static, F> {
            store: &'static S,
            #[pin]
            task: F,
        }
    }
    impl<S: Store, F: Future> Future for Task<S, F> {
        type Output = F::Output;
        fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Self::Output> {
            let this = self.project();
            let task = this.task;
            let store = this.store;
            run(*store, || task.poll(ctx))
        }
    }
    Task { store, task }
}

// TODO: deal with unwind safety around:
// - run_borrowed
/// Perform a task that skips over any scoped hooks
/// for its allocations.
fn without_hooks<T, F: FnOnce() -> T>(task: F) -> T {
    let hook = unsafe { stores::reregister(None) };
    ::unwind_safe::try_eval(|| {
        task()
    }).finally(|_| {
        unsafe { stores::reregister(hook); }
    })
}

// This method is unsafe because it can cause an allocation to
// be deallocated with the wrong allocator.
// Primarily meant to be used inside the GlobalAlloc implementation
// for Global<T>.
/// Perform a task that skips over any scoped hooks
/// for its allocations *and* deallocations.
/// # Safety
/// Ensure that no deallocations occur which rely on a scoped [`GlobalAlloc`].
unsafe fn skip_hooks<T, F: FnOnce() -> T>(task: F) -> T {
    SKIP_HOOKS.with(|cell| cell.replace(true));
    ::unwind_safe::try_eval(|| {
        without_hooks(task)
    }).finally(|_| {
        SKIP_HOOKS.with(|cell| cell.replace(false));
    })
}

mod stores {
    use ::core::ptr::NonNull;
    use ::core::cell::Cell;
    use super::Store;
    // Lifetime erased trait object,
    // for use with a store which can't enforce lifetimes.
    pub(crate) type Hook = Option<NonNull<dyn Store>>;
    thread_local! {
        static CURRENT_TARGET: Cell<Hook> = Cell::new(None);
    }
    pub(crate) unsafe fn register<S: Store>(store: &S) -> Hook {
        unsafe { reregister(Some(NonNull::new_unchecked(store as *const dyn Store as *mut dyn Store))) }
    }
    pub(crate) unsafe fn reregister(hook: Hook) -> Hook {
        CURRENT_TARGET.with(|cell| {
            cell.replace(hook)
        })
    }

    // // Note: `deregister` currently only requires a store parameter
    // // to check an assertion.
    // pub(crate) unsafe fn deregister<S: Store>(store: &S) {
    //     // TODO: use sptr for strict provenance annotation-
    //     // this *does not* require exposing the address.
    //     let dereg_addr = store as *const dyn Store as *const () as usize;
    //     let current_addr = CURRENT_TARGET.with(|cell| cell.replace(None).unwrap().as_ptr() as *const () as usize);
    //     assert_eq!(dereg_addr, current_addr);
    // }

    pub(crate) fn current() -> Hook {
        CURRENT_TARGET.with(|cell| cell.get())
    }
}

thread_local! {
    // This is currently only read during GlobalAlloc::dealloc,
    // since GlobalAlloc::alloc already has something it can
    // check to decide whether to skip its hooks.
    // # Safety
    // Do not write to this outside of skip_hooks.
    static SKIP_HOOKS: Cell<bool> = Cell::new(false);
}

pub struct Global<T> {
    alloc: T,
    addrs: Mutex<Lazy<BTreeMap<usize, NonNull<dyn Store>>>>,
}
// TODO: document why this is correct
unsafe impl<T> Sync for Global<T> {}
impl Global<System> {
    pub const fn system() -> Global<System> {
        Global {
            alloc: System,
            addrs: Mutex::new(Lazy::new(|| BTreeMap::new())),
        }
    }
}
impl<T> Global<T> {
    pub const fn new(alloc: T) -> Self {
        Self { alloc, addrs: Mutex::new(Lazy::new(|| BTreeMap::new())) }
    }
}
unsafe impl<T: GlobalAlloc> GlobalAlloc for Global<T> {
    // TODO: consider using a macro to merge the declarations of
    // alloc and alloc_zeroed.
    // TODO: consider using a macro to merge the declarations of dealloc and realloc
    // TODO: maybe we can merge all of them into a macro
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
        match stores::current() {
            Some(hook) => {
                // Locking the Mutex may allocate,
                // and we definitely do not want to
                // walk down this path inside it, due to
                // infinite recursion woes.
                let store = unsafe { hook.as_ref() };
                // Notably, while GlobalAlloc's contract says you can't
                // unwind out of it directly, the alloc error hook set by
                // the Nightly-only set_alloc_error_hook function and called by
                // handle_alloc_error may end up being allowed to do so.
                // Therefore, we need to write this like `store.alloc` may panic.
                // (Do not place it between two things that *must* happen together.)
                let ptr = unsafe { store.alloc(layout) };
                unsafe { skip_hooks(|| {
                    // TODO: remove any and all possibility of this unwinding
                    let mut addrs = self.addrs.lock().unwrap();
                    addrs.insert(ptr as usize, hook);
                }) };
                ptr
            },
            None => unsafe { GlobalAlloc::alloc(&self.alloc, layout) },
        }
    }
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
        if SKIP_HOOKS.with(|c| c.get()) {
            unsafe { GlobalAlloc::dealloc(&self.alloc, ptr, layout) }
        } else {
            let entry = unsafe { skip_hooks(|| {
                // TODO: remove any and all possibility of this unwinding
                let mut addrs = self.addrs.lock().unwrap();
                addrs.remove(&(ptr as usize))
            }) };
            match entry {
                Some(store) => {
                    let store = unsafe { store.as_ref() };
                    unsafe { store.dealloc(ptr, layout) }
                },
                None => unsafe { GlobalAlloc::dealloc(&self.alloc, ptr, layout) },
            }
        }
    }
    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
        match stores::current() {
            Some(hook) => {
                let store = unsafe { hook.as_ref() };
                let ptr = unsafe { store.alloc_zeroed(layout) };
                unsafe { skip_hooks(|| {
                    // TODO: remove any and all possibility of this unwinding
                    let mut addrs = self.addrs.lock().unwrap();
                    addrs.insert(ptr as usize, hook);
                }) };
                ptr
            },
            None => unsafe { GlobalAlloc::alloc_zeroed(&self.alloc, layout) },
        }
    }
    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
        // realloc is a lot like dealloc, as it needs to invoke the same allocator
        // that the allocation was allocated with
        if SKIP_HOOKS.with(|c| c.get()) {
            unsafe { GlobalAlloc::realloc(&self.alloc, ptr, layout, new_size) }
        } else {
            let entry = unsafe { skip_hooks(|| {
                // TODO: remove any and all possibility of this unwinding
                let mut addrs = self.addrs.lock().unwrap();
                addrs.remove(&(ptr as usize))
            }) };
            match entry {
                Some(store) => {
                    let store = unsafe { store.as_ref() };
                    unsafe { store.realloc(ptr, layout, new_size) }
                },
                None => unsafe { GlobalAlloc::realloc(&self.alloc, ptr, layout, new_size) },
            }
        }
    }
}

#[cfg(test)]
mod tests {
    #[test]
    fn it_works() {
        let result = 2 + 2;
        assert_eq!(result, 4);
    }
}