1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
use crate::phoenix_error;
use std::cmp::Reverse;
use std::collections::BinaryHeap;

use crate::value::{HeapObj, HeapObjVal, Value};
use crate::vm::Global;

const DEBUG_GC: bool = false;
const DEBUG_STRESS_GC: bool = false;

const INIT_GC_THRESHOLD: usize = 200;
const MIN_SCALING_FACTOR: f32 = 0.5;
const MAX_SCALING_FACTOR: f32 = 2.0;
const SHRINK_THRESHOLD: f32 = 0.75; // Shrink if new_size < current_size * shrink_threshold => close to 1 means lots of shrinks, close to 0 means rarely shrink

/// The garbage collector. Let's go
///
/// Important note to make sure I never break the GC:
/// The only way we can deallocate something that we didnt mean to is if we have a PhoenixPointer somewhere other than the stack, globals, or in a reachable HeapObj
/// So be careful if we ever alloc a PhoenixClosure or a PhoenixInstance when a PhoenixPointer is floating around on the rust stack
///   => ie popping a value off, calling alloc, and then doing something with that value. If that value is the last pointer to an instance, it'll cause the instance to be deleted
///
/// Ideas for making this not have placeholder values / fewer placeholder values:
/// Steps to making it work:
/// 1. Use a linked list for instances
/// 2. When removing a value, replace the node with a placeholder
/// 3. When removing a value next to a placeholder, merge the two together and increment a counter in the placeholder
/// 4. When traversing the linked list, a placeholder is worth {counter} slots
/// 5. When allocating values, use the same queue but if it hits the middle of a placeholder, split it down the middle into two placeholders?
///   => Ideally we would not but since the free_slots stack is not ordered in any way we don't have any guarantees
///
/// This would get rid of most of the placeholder values but with a few problems
/// A. The memory usage of the placeholders is minimal already
/// B. Placeholders don't necessarily "leak", ie: running the program for a long enough time will not cause a memory shortage unless the code itself had used that much memory without GC'ing
/// C. Linked lists and doing those traversals will undoubtedly be slower than the current Vec implementation, will it even be worth it?
///
/// All in all, I think I'll need to wait until I have some code to profile.
#[derive(Debug, Clone)]
pub struct GC {
    pub instances: Vec<HeapObj>,
    /// The number of live allocations
    allocations: usize,
    /// The number of allocations allowed until we GC
    next_gc_threshold: usize,
    /// Each worklist task is an index into the instances vec for the HeapObj
    grey_worklist: Vec<usize>,
    /// A priority queue for which slots to allocate. A min-heap because we want to allocate the front slots of the instances vec first,
    /// so that the later slots (which are still filled but just with placeholders) can be truncated in the cases where a users program allocates a large amount, drops them all, and then leavesthe instances vec full of placeholders
    free_slots: BinaryHeap<Reverse<usize>>,
    // Annoying to implement because new variables will get instantiated with the wrong value, possibly allowing them to live one extra round of GC
    // unmarked: bool, // Which bool type represents an "unmarked" node
}

impl Default for GC {
    fn default() -> Self {
        Self {
            instances: Vec::new(),
            allocations: 0,
            next_gc_threshold: INIT_GC_THRESHOLD,
            grey_worklist: Vec::new(),
            free_slots: BinaryHeap::new(),
        }
    }
}

impl PartialEq for GC {
    fn eq(&self, other: &Self) -> bool {
        self.instances == other.instances
            && self.allocations == other.allocations
            && self.next_gc_threshold == other.next_gc_threshold
            && self.grey_worklist == other.grey_worklist
    }
}

impl GC {
    pub fn alloc(&mut self, val: HeapObj, stack: &[Value], globals: &[Global]) -> Value {
        if DEBUG_STRESS_GC || self.allocations >= self.next_gc_threshold {
            self.collect_garbage(stack, globals);
        }

        self.instances.push(val); // Either way we need to put on the new instance
        let index = if self.free_slots.is_empty() {
            self.instances.len() - 1
        } else {
            match self.free_slots.pop() {
                Some(Reverse(index)) => {
                    // Swap the instance over to it's slot and remove the placeholder that used to be there
                    let placeholder = self.instances.swap_remove(index);
                    if placeholder.obj != HeapObjVal::HeapPlaceholder {
                        phoenix_error!(
                            "VM error! GC attempted to use an allocated value as a free slot"
                        )
                    }
                    index
                }
                None => phoenix_error!("VM panic!"),
            }
        };

        self.allocations += 1;
        if DEBUG_GC {
            eprintln!(
                "allocated slot {} | # of allocations = {}",
                index, self.allocations
            )
        }
        Value::PhoenixPointer(index)
    }

    fn mark_heap_obj(&mut self, index: usize) {
        let obj_opt = self.instances.get_mut(index);
        match obj_opt {
            Some(obj) => {
                if !obj.is_marked {
                    // obj.is_marked = !self.unmarked;
                    obj.is_marked = true;
                    if DEBUG_GC {
                        eprintln!("marked {:?} at {}", obj.obj_type, index)
                    }
                    self.grey_worklist.push(index); // Only the values that are pointed to by PhoenixPointers (instances and closures) can contain values that might be garbage collected
                }
            }
            None => panic!("VM panic! Why is there an unallocated pointer?"),
        }
    }

    fn mark_value(&mut self, val: &Value) {
        if let Value::PhoenixPointer(ptr) = val {
            self.mark_heap_obj(*ptr);
        }
    }

    fn mark_roots(&mut self, stack: &[Value], globals: &[Global]) {
        for val in stack.iter() {
            self.mark_value(val);
        }

        for val in globals.iter() {
            if let Global::Init(v) = val {
                self.mark_value(v);
            }
        }
    }

    fn mark_grey(&mut self) {
        while !self.grey_worklist.is_empty() {
            let index = self.grey_worklist.pop().unwrap();
            let obj_opt = self.instances.get(index);
            let mut to_mark = Vec::new();

            match obj_opt {
                Some(obj) => {
                    // Blacken -> Look for PhoenixPointers that might be stored in these HeapObjs
                    if DEBUG_GC {
                        eprintln!("blackening {:?} at {}", obj.obj_type, index)
                    }
                    match &obj.obj {
                        HeapObjVal::PhoenixClosure(closure) => {
                            for val in &closure.values {
                                if let Value::PhoenixPointer(ptr) = val {
                                    to_mark.push(*ptr);
                                }
                            }
                        }
                        HeapObjVal::PhoenixInstance(instance) => {
                            for val in instance.fields.values() {
                                if let Value::PhoenixPointer(ptr) = val {
                                    to_mark.push(*ptr);
                                }
                            }
                        }
                        HeapObjVal::PhoenixList(list) => {
                            for val in list.values.iter() {
                                if let Value::PhoenixPointer(ptr) = val {
                                    to_mark.push(*ptr);
                                }
                            }
                        }
                        HeapObjVal::HeapPlaceholder => {
                            panic!("VM panic! Why do we have a valid reference to a heap placeholder value?")
                        }
                    }
                }
                None => panic!("VM panic! Why is there an unallocated pointer?"),
            }

            for ptr in to_mark.iter() {
                self.mark_heap_obj(*ptr);
            }
        }
    }

    fn sweep(&mut self) -> Option<usize> {
        let mut shrinkable_to: Option<usize> = None;

        for (index, obj) in self.instances.iter_mut().enumerate() {
            if obj.obj == HeapObjVal::HeapPlaceholder {
                match shrinkable_to {
                    Some(_) => {}
                    None => shrinkable_to = Some(index),
                }
            } else {
                // Since we hit a non-placeholder, we obviously can't shrink to less than the current index, so reset it to None
                shrinkable_to = None;

                // Now check if we need to free this obj
                if !obj.is_marked {
                    if DEBUG_GC {
                        eprintln!(
                            "freed slot {} | # of allocations = {} | value to free = {:?}",
                            index, self.allocations, obj,
                        )
                    }

                    let mut placeholder = HeapObj::new_placeholder(); // Create a new placeholder to swap with the obj, which will get dropped at the end of this block
                    std::mem::swap(obj, &mut placeholder);

                    self.free_slots.push(Reverse(index));
                    self.allocations -= 1;
                } else {
                    // Reset the is_marked flag for the next gc
                    obj.is_marked = false;
                }
            }
        }

        shrinkable_to
    }

    /// Shrink the instances vec as much as possible, but only if we will be removing above a given threshold # of placeholders
    fn shrink(&mut self, new_size: usize) {
        if (new_size as f32) < SHRINK_THRESHOLD * (self.instances.len() as f32) {
            if DEBUG_GC {
                eprintln!("shrinking from {} to {}", self.instances.len(), new_size)
            }
            self.instances.truncate(new_size);

            // Make sure that we remove those indices from free_slots
            let mut new_slots: BinaryHeap<Reverse<usize>> = BinaryHeap::new();
            for x in self.free_slots.drain() {
                match x {
                    Reverse(i) => {
                        if i < new_size {
                            new_slots.push(x)
                        }
                    }
                }
            }
            self.free_slots = new_slots;
        } else if DEBUG_GC {
            eprintln!(
                "not shrinking from {} to {}",
                self.instances.len(),
                new_size
            )
        }
    }

    /// Rescale the GC threshold. Called after all garbage collections
    fn rescale_threshold(&mut self) {
        // Now we know we went from self.next_gc_threshold # of instances down to self.allocations # of instances
        // Use that difference to determine the next_gc_threshold
        let diff = self.next_gc_threshold - self.allocations;

        // 0 <= diff <= old_threshold
        // If this number is small, then we have mostly live values, and we should let the heap grow quite a bit before we try to GC again
        // => If diff = 0, double the threshold

        // If this number is large, then we should GC more often
        // => if diff = old_threshold, halve the threshold

        let old_threshold = self.next_gc_threshold as f32;
        let slope: f32 = (MIN_SCALING_FACTOR - MAX_SCALING_FACTOR) / old_threshold;
        let scaling_factor = slope * (diff as f32) + MAX_SCALING_FACTOR;
        let new_threshold = old_threshold * scaling_factor;
        self.next_gc_threshold = 1 + new_threshold as usize;

        if DEBUG_GC {
            eprintln!(
                "Scaled GC threshold from {} to {}",
                old_threshold, self.next_gc_threshold
            );
        }
    }

    fn collect_garbage(&mut self, stack: &[Value], globals: &[Global]) {
        if DEBUG_GC {
            eprintln!("--- gc begin")
        }

        self.mark_roots(stack, globals);
        self.mark_grey();
        let shrinkable_to = self.sweep();

        if let Some(new_size) = shrinkable_to {
            self.shrink(new_size);
        }

        if DEBUG_GC {
            // # of collections this round is inaccurate if we have DEBUG_GC_STRESS turned on, since we don't use the threshold
            eprintln!(
                "After collection | vec_size = {} | allocations = {} | collected = {}",
                self.instances.len(),
                self.allocations,
                self.next_gc_threshold - self.allocations
            );
        }

        self.rescale_threshold();

        //self.unmarked = !self.unmarked; // Flip for the next gc run
        if DEBUG_GC {
            eprintln!("--- gc end")
        }
    }

    pub fn new() -> GC {
        GC {
            grey_worklist: Vec::new(),
            instances: Vec::new(),
            free_slots: BinaryHeap::new(),
            allocations: 0,
            next_gc_threshold: INIT_GC_THRESHOLD,
        }
    }
}