1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
//! GC heap allocator.
//!
//! ### Safety
//!
//! First, a few general observations about safety in Rust.
//!
//! *   If a `mut` reference to a value exists, and other references to the value
//!     also exist, we can definitely crash.
//!
//!     One way it can happen is that the `mut` value is or contains an
//!     `enum`. Using the other reference, we can borrow a refrence to a
//!     current field of the `enum`. Then, using the `mut` reference, we can
//!     assign some other variant to the `enum`, invalidating the reference.
//!     Rust does not detect that the reference is invalid, so when we use it,
//!     we are accessing gibberish.
//!
//!     Another way is if a callback is called while we're in the middle of
//!     mutating a value, while it's in an invalid state. When you read
//!     "callback", think of `deref` and `drop` methods, which are called
//!     implicitly all over the place. These callbacks are normally safe,
//!     because they simply can't have a reference to the value that's in an
//!     intermediate state. But if another reference exists, it might be used
//!     to try to read from the half-mutated value.
//!
//! *   If a data structure contains two paths to a value, under Rust's usual
//!     rules, you can get two `mut` references to that value.
//!
//!     This is why you tend to build ownership trees in Rust and it's why GC
//!     is particularly challenging in Rust: GCs build an arbitrary graph of
//!     values and references.
//!
//! This GC takes the following approach to ensure safety.
//!
//! *   Minimize access to values stored in the heap. For the most part,
//!     application code *never* sees a direct reference to any value that is
//!     physically stored someplace where it's subject to GC.
//!
//! *   Minimize the times when direct references to in-heap values exist at
//!     all, and during these operations, prevent control from escaping to
//!     arbitrary application code.
//!
//! *   Ensure that when any direct references to in-heap values exist, they
//!     obey Rust's rules: for any given value, either only non-`mut`
//!     references, or at most one `mut` reference, exists at a time.
//!
//! Thus we are particularly keen to avoid the possibility of "reentering" the
//! heap, creating new references to in-heap values while others already exist.
//!
//! References to heap values therefore exist only during the following
//! operations:
//!
//! *   Allocation - That is, moving values into the heap. This is safe because
//!     it never triggers any user code at all while heap references exist.
//!
//! * - Heap reads and writes - The only way to do these is via macro-generated
//!     accessors which do not expose references.  Reads call `from_heap()` on
//!     in-heap values, which is dangerous because `from_heap()` receives a
//!     direct reference.  Writes call `drop()`, which is even more dangerous:
//!     (1) it receives a direct `mut` reference; and (2) it leaves in-heap
//!     values uninitialized.
//!
//! *   GC marking - The code for this is all macro-generated.
//!
//! *   GC sweeping - This calls `drop()`, which is dangerous for the reasons
//!     noted above.
//!
//! To make this scheme safe, `from_heap()` and `drop()` must be tightly controlled.
//! `from_heap()` is therefore in an unsafe trait; users are expected to use
//! the `gc_heap_type!` to autogenerate instances.
//!
//! However, **we leave it up to the user to exercise care with `drop()`.**
//! We suggest *never* implementing `Drop` for a heap type. If you must,
//! avoid reading pointer fields while dropping, and avoid calling into
//! arbitrary code.

use std::cell::RefCell;
use std::collections::HashMap;
use std::marker::PhantomData;
use std::ptr;
use traits::IntoHeapAllocation;
use pages::{heap_type_id, TypedPage, PageSet, PageSetRef};
use ptr::{Pointer, UntypedPointer};
use gcref::GcRef;
use marking::{mark, MarkingTracer};

pub struct Heap {
    page_sets: HashMap<usize, PageSet>,
    pins: RefCell<HashMap<UntypedPointer, usize>>,
    marking_tracer: Option<MarkingTracer>,
}

// What does this do? You'll never guess!
pub type HeapSessionId<'h> = PhantomData<::std::cell::Cell<&'h mut ()>>;

pub struct HeapSession<'h> {
    id: HeapSessionId<'h>,
    heap: &'h mut Heap,
}

/// Create a heap, pass it to a callback, then destroy the heap.
///
/// The heap's lifetime is directly tied to this function call, for safety. (So
/// the API is a little wonky --- but how many heaps were you planning on
/// creating?)
pub fn with_heap<R, F>(f: F) -> R
where
    F: for<'h> FnOnce(&mut HeapSession<'h>) -> R,
{
    Heap::new().enter(f)
}

impl Heap {
    /// Create a new, empty heap.
    pub fn new() -> Heap {
        Heap {
            page_sets: HashMap::new(),
            pins: RefCell::new(HashMap::new()),
            marking_tracer: Some(MarkingTracer::default()),
        }
    }

    /// Run some code using this Heap.
    ///
    /// # Example
    ///
    ///     use cell_gc::{Heap, GCLeaf};
    ///
    ///     let mut heap = Heap::new();
    ///     heap.enter(|hs| {
    ///         // ... hs.alloc(MyHeapStruct { ... }) ...
    ///         # hs.force_gc();
    ///     });
    ///
    pub fn enter<R, F>(&mut self, f: F) -> R
    where
        F: for<'h> FnOnce(&mut HeapSession<'h>) -> R,
    {
        f(&mut HeapSession::new(self))
    }

    /// Add the value `*p` to the root set, protecting it from GC.
    ///
    /// A value that has been pinned *n* times stays in the root set
    /// until it has been unpinned *n* times.
    ///
    /// # Safety
    ///
    /// `p` must point to a live allocation of type `T` in this heap.
    pub unsafe fn pin<'h, T: IntoHeapAllocation<'h>>(&self, p: Pointer<T::In>) {
        let mut pins = self.pins.borrow_mut();
        let entry = pins.entry(p.into()).or_insert(0);
        *entry += 1;
    }

    /// Unpin a heap-allocated value (see `pin`).
    ///
    /// # Safety
    ///
    /// `p` must point to a pinned allocation of type `T` in this heap.
    pub unsafe fn unpin<'h, T: IntoHeapAllocation<'h>>(&self, p: Pointer<T::In>) {
        let mut pins = self.pins.borrow_mut();
        let done = {
            let entry = pins.entry(p.into()).or_insert(0);
            assert!(*entry != 0);
            *entry -= 1;
            *entry == 0
        };
        if done {
            pins.remove(&p.into());
        }
    }

    /// Call the given function on each pinned root.
    pub fn each_pin<F>(&self, mut f: F)
    where
        F: FnMut(UntypedPointer),
    {
        for (&ptr, _) in self.pins.borrow().iter() {
            f(ptr);
        }
    }

    pub unsafe fn from_allocation<'h, T: IntoHeapAllocation<'h>>(ptr: Pointer<T::In>) -> *const Heap {
        (*TypedPage::find(ptr)).header.heap
    }

    pub unsafe fn get_mark_bit<'h, T: IntoHeapAllocation<'h>>(ptr: Pointer<T::In>) -> bool {
        (*TypedPage::find(ptr)).get_mark_bit(ptr)
    }

    pub unsafe fn set_mark_bit<'h, T: IntoHeapAllocation<'h>>(ptr: Pointer<T::In>) {
        (*TypedPage::find(ptr)).set_mark_bit(ptr);
    }

    fn take_marking_tracer(&mut self) -> MarkingTracer {
        self.marking_tracer.take().unwrap()
    }

    fn replace_marking_tracer(&mut self, tracer: MarkingTracer) {
        assert!(self.marking_tracer.is_none());
        assert!(tracer.mark_stack_is_empty());
        self.marking_tracer = Some(tracer);
    }

    /// Run the given function with the marking tracer.
    ///
    /// The marking tracer is taken out of the heap and replaced again so we can
    /// have two independent borrows of the heap and the marking tracer and the
    /// same time.
    pub(crate) fn with_marking_tracer<F, O>(&mut self, mut f: F) -> O
    where
        F: FnMut(&mut Self, &mut MarkingTracer) -> O,
    {
        let mut tracer = self.take_marking_tracer();
        let retval = f(self, &mut tracer);
        self.replace_marking_tracer(tracer);
        retval
    }

    /// Clear all mark bits in preparation for GC.
    ///
    /// # Safety
    ///
    /// This must be called only at the beginning of a GC cycle.
    pub(crate) unsafe fn clear_mark_bits(&mut self) {
        for page_set in self.page_sets.values_mut() {
            page_set.clear_mark_bits();
        }
    }

    fn gc(&mut self) {
        mark(self);

        // sweep phase
        for page_set in self.page_sets.values_mut() {
            unsafe {
                page_set.sweep();
            }
        }
    }
}

impl Drop for Heap {
    fn drop(&mut self) {
        // Perform a final GC to call destructors on any remaining allocations.
        assert!(self.pins.borrow().is_empty());
        self.gc();

        for page_set in self.page_sets.values() {
            page_set.assert_no_allocations();
        }
    }
}

impl<'h> HeapSession<'h> {
    fn new(heap: &'h mut Heap) -> HeapSession<'h> {
        HeapSession {
            id: PhantomData,
            heap,
        }
    }

    fn get_page_set<'a, T: IntoHeapAllocation<'h> + 'a>(&'a mut self) -> PageSetRef<'a, 'h, T> {
        let key = heap_type_id::<T>();
        let heap: *mut Heap = self.heap;
        let page_ref =
            self.heap
            .page_sets
            .entry(key)
            .or_insert_with(|| unsafe { PageSet::new(heap) });
        unsafe { page_ref.unchecked_downcast_mut() }
    }

    pub fn set_page_limit<T: IntoHeapAllocation<'h>>(&mut self, limit: Option<usize>) {
        self.get_page_set::<T>().set_page_limit(limit);
    }

    pub fn try_alloc<T: IntoHeapAllocation<'h>>(&mut self, value: T) -> Option<T::Ref> {
        // For now, this is done very early, so that if it panics, the heap is
        // left in an OK state. Better wrapping of raw pointers would make it
        // possible to do this later, closer to the `ptr::write()` call. (And
        // the compiler might optimize away this temporary if we do it that
        // way.) Looking forward to placement new!
        let u = value.into_heap();
        unsafe {
            let alloc = self.get_page_set::<T>().try_alloc();
            alloc
                .or_else(|| {
                    self.heap.gc();
                    self.get_page_set::<T>().try_alloc()
                })
                .map(move |p| {
                    ptr::write(p.as_raw() as *mut _, u);
                    T::wrap_gcref(GcRef::new(p))
                })
        }
    }

    pub fn alloc<T: IntoHeapAllocation<'h>>(&mut self, value: T) -> T::Ref {
        self.try_alloc(value)
            .expect("out of memory (gc did not collect anything)")
    }

    pub fn force_gc(&mut self) {
        self.heap.gc();
    }
}