phoenix_lang/
gc.rs

1use crate::phoenix_error;
2use std::cmp::Reverse;
3use std::collections::BinaryHeap;
4
5use crate::value::{HeapObj, HeapObjVal, Value};
6use crate::vm::Global;
7
8const DEBUG_GC: bool = false;
9const DEBUG_STRESS_GC: bool = false;
10
11const INIT_GC_THRESHOLD: usize = 200;
12const MIN_SCALING_FACTOR: f32 = 0.5;
13const MAX_SCALING_FACTOR: f32 = 2.0;
14const SHRINK_THRESHOLD: f32 = 0.75; // Shrink if new_size < current_size * shrink_threshold => close to 1 means lots of shrinks, close to 0 means rarely shrink
15
16/// The garbage collector. Let's go
17///
18/// Important note to make sure I never break the GC:
19/// The only way we can deallocate something that we didnt mean to is if we have a PhoenixPointer somewhere other than the stack, globals, or in a reachable HeapObj
20/// So be careful if we ever alloc a PhoenixClosure or a PhoenixInstance when a PhoenixPointer is floating around on the rust stack
21///   => ie popping a value off, calling alloc, and then doing something with that value. If that value is the last pointer to an instance, it'll cause the instance to be deleted
22///
23/// Ideas for making this not have placeholder values / fewer placeholder values:
24/// Steps to making it work:
25/// 1. Use a linked list for instances
26/// 2. When removing a value, replace the node with a placeholder
27/// 3. When removing a value next to a placeholder, merge the two together and increment a counter in the placeholder
28/// 4. When traversing the linked list, a placeholder is worth {counter} slots
29/// 5. When allocating values, use the same queue but if it hits the middle of a placeholder, split it down the middle into two placeholders?
30///   => Ideally we would not but since the free_slots stack is not ordered in any way we don't have any guarantees
31///
32/// This would get rid of most of the placeholder values but with a few problems
33/// * A) The memory usage of the placeholders is minimal already
34/// * B) Placeholders don't necessarily "leak", ie: running the program for a long enough time will not cause a memory shortage unless the code itself had used that much memory without GC'ing
35/// * C) Linked lists and doing those traversals will undoubtedly be slower than the current Vec implementation, will it even be worth it?
36///
37/// All in all, I think I'll need to wait until I have some code to profile.
38#[derive(Debug, Clone)]
39pub struct GC {
40    pub instances: Vec<HeapObj>,
41    /// The number of live allocations
42    allocations: usize,
43    /// The number of allocations allowed until we GC
44    next_gc_threshold: usize,
45    /// Each worklist task is an index into the instances vec for the HeapObj
46    grey_worklist: Vec<usize>,
47    /// A priority queue for which slots to allocate. A min-heap because we want to allocate the front slots of the instances vec first,
48    /// so that the later slots (which are still filled but just with placeholders) can be truncated in the cases where a users program allocates a large amount, drops them all, and then leavesthe instances vec full of placeholders
49    free_slots: BinaryHeap<Reverse<usize>>,
50    // Annoying to implement because new variables will get instantiated with the wrong value, possibly allowing them to live one extra round of GC
51    // unmarked: bool, // Which bool type represents an "unmarked" node
52}
53
54impl Default for GC {
55    fn default() -> Self {
56        Self {
57            instances: Vec::new(),
58            allocations: 0,
59            next_gc_threshold: INIT_GC_THRESHOLD,
60            grey_worklist: Vec::new(),
61            free_slots: BinaryHeap::new(),
62        }
63    }
64}
65
66impl PartialEq for GC {
67    fn eq(&self, other: &Self) -> bool {
68        self.instances == other.instances
69            && self.allocations == other.allocations
70            && self.next_gc_threshold == other.next_gc_threshold
71            && self.grey_worklist == other.grey_worklist
72    }
73}
74
75impl GC {
76    pub fn alloc(&mut self, val: HeapObj, stack: &[Value], globals: &[Global]) -> Value {
77        if DEBUG_STRESS_GC || self.allocations >= self.next_gc_threshold {
78            self.collect_garbage(stack, globals);
79        }
80
81        self.instances.push(val); // Either way we need to put on the new instance
82        let index = if self.free_slots.is_empty() {
83            self.instances.len() - 1
84        } else {
85            match self.free_slots.pop() {
86                Some(Reverse(index)) => {
87                    // Swap the instance over to it's slot and remove the placeholder that used to be there
88                    let placeholder = self.instances.swap_remove(index);
89                    if placeholder.obj != HeapObjVal::HeapPlaceholder {
90                        phoenix_error!(
91                            "VM error! GC attempted to use an allocated value as a free slot"
92                        )
93                    }
94                    index
95                }
96                None => phoenix_error!("VM panic!"),
97            }
98        };
99
100        self.allocations += 1;
101        if DEBUG_GC {
102            eprintln!(
103                "allocated slot {} | # of allocations = {}",
104                index, self.allocations
105            )
106        }
107        Value::PhoenixPointer(index)
108    }
109
110    fn mark_heap_obj(&mut self, index: usize) {
111        let obj_opt = self.instances.get_mut(index);
112        match obj_opt {
113            Some(obj) => {
114                if !obj.is_marked {
115                    // obj.is_marked = !self.unmarked;
116                    obj.is_marked = true;
117                    if DEBUG_GC {
118                        eprintln!("marked {:?} at {}", obj.obj_type, index)
119                    }
120                    self.grey_worklist.push(index); // Only the values that are pointed to by PhoenixPointers (instances and closures) can contain values that might be garbage collected
121                }
122            }
123            None => panic!("VM panic! Why is there an unallocated pointer?"),
124        }
125    }
126
127    fn mark_value(&mut self, val: &Value) {
128        if let Value::PhoenixPointer(ptr) = val {
129            self.mark_heap_obj(*ptr);
130        }
131    }
132
133    fn mark_roots(&mut self, stack: &[Value], globals: &[Global]) {
134        for val in stack.iter() {
135            self.mark_value(val);
136        }
137
138        for val in globals.iter() {
139            if let Global::Init(v) = val {
140                self.mark_value(v);
141            }
142        }
143    }
144
145    fn mark_grey(&mut self) {
146        while let Some(index) = self.grey_worklist.pop() {
147            let obj_opt = self.instances.get(index);
148            let mut to_mark = Vec::new();
149
150            match obj_opt {
151                Some(obj) => {
152                    // Blacken -> Look for PhoenixPointers that might be stored in these HeapObjs
153                    if DEBUG_GC {
154                        eprintln!("blackening {:?} at {}", obj.obj_type, index)
155                    }
156                    match &obj.obj {
157                        HeapObjVal::PhoenixClosure(closure) => {
158                            for val in &closure.values {
159                                if let Value::PhoenixPointer(ptr) = val {
160                                    to_mark.push(*ptr);
161                                }
162                            }
163                        }
164                        HeapObjVal::PhoenixInstance(instance) => {
165                            for val in instance.fields.values() {
166                                if let Value::PhoenixPointer(ptr) = val {
167                                    to_mark.push(*ptr);
168                                }
169                            }
170                        }
171                        HeapObjVal::PhoenixList(list) => {
172                            for val in list.values.iter() {
173                                if let Value::PhoenixPointer(ptr) = val {
174                                    to_mark.push(*ptr);
175                                }
176                            }
177                        }
178                        HeapObjVal::HeapPlaceholder => {
179                            panic!("VM panic! Why do we have a valid reference to a heap placeholder value?")
180                        }
181                        HeapObjVal::PhoenixString(_string) => {
182                            // to_mark.push(string.ptr);
183                        }
184                        HeapObjVal::PhoenixHashMap(map) => {
185                            for val in &map.map {
186                                if let Value::PhoenixPointer(ptr) = val.0 {
187                                    to_mark.push(*ptr);
188                                }
189                                if let Value::PhoenixPointer(ptr) = val.1 {
190                                    to_mark.push(*ptr);
191                                }
192                            }
193                        }
194                    }
195                }
196                None => panic!("VM panic! Why is there an unallocated pointer?"),
197            }
198
199            for ptr in to_mark.iter() {
200                self.mark_heap_obj(*ptr);
201            }
202        }
203    }
204
205    fn sweep(&mut self) -> Option<usize> {
206        let mut shrinkable_to: Option<usize> = None;
207
208        for (index, obj) in self.instances.iter_mut().enumerate() {
209            if obj.obj == HeapObjVal::HeapPlaceholder {
210                match shrinkable_to {
211                    Some(_) => {}
212                    None => shrinkable_to = Some(index),
213                }
214            } else {
215                // Since we hit a non-placeholder, we obviously can't shrink to less than the current index, so reset it to None
216                shrinkable_to = None;
217
218                // Now check if we need to free this obj
219                if !obj.is_marked {
220                    if DEBUG_GC {
221                        eprintln!(
222                            "freed slot {} | # of allocations = {} | value to free = {:?}",
223                            index, self.allocations, obj,
224                        )
225                    }
226
227                    let mut placeholder = HeapObj::new_placeholder(); // Create a new placeholder to swap with the obj, which will get dropped at the end of this block
228                    std::mem::swap(obj, &mut placeholder);
229
230                    self.free_slots.push(Reverse(index));
231                    self.allocations -= 1;
232                } else {
233                    // Reset the is_marked flag for the next gc
234                    obj.is_marked = false;
235                }
236            }
237        }
238
239        shrinkable_to
240    }
241
242    /// Shrink the instances vec as much as possible, but only if we will be removing above a given threshold # of placeholders
243    fn shrink(&mut self, new_size: usize) {
244        if (new_size as f32) < SHRINK_THRESHOLD * (self.instances.len() as f32) {
245            if DEBUG_GC {
246                eprintln!("shrinking from {} to {}", self.instances.len(), new_size)
247            }
248            self.instances.truncate(new_size);
249
250            // Make sure that we remove those indices from free_slots
251            let mut new_slots: BinaryHeap<Reverse<usize>> = BinaryHeap::new();
252            for x in self.free_slots.drain() {
253                match x {
254                    Reverse(i) => {
255                        if i < new_size {
256                            new_slots.push(x)
257                        }
258                    }
259                }
260            }
261            self.free_slots = new_slots;
262        } else if DEBUG_GC {
263            eprintln!(
264                "not shrinking from {} to {}",
265                self.instances.len(),
266                new_size
267            )
268        }
269    }
270
271    /// Rescale the GC threshold. Called after all garbage collections
272    fn rescale_threshold(&mut self) {
273        // Now we know we went from self.next_gc_threshold # of instances down to self.allocations # of instances
274        // Use that difference to determine the next_gc_threshold
275        let diff = self.next_gc_threshold - self.allocations;
276
277        // 0 <= diff <= old_threshold
278        // If this number is small, then we have mostly live values, and we should let the heap grow quite a bit before we try to GC again
279        // => If diff = 0, double the threshold
280
281        // If this number is large, then we should GC more often
282        // => if diff = old_threshold, halve the threshold
283
284        let old_threshold = self.next_gc_threshold as f32;
285        let slope: f32 = (MIN_SCALING_FACTOR - MAX_SCALING_FACTOR) / old_threshold;
286        let scaling_factor = slope * (diff as f32) + MAX_SCALING_FACTOR;
287        let new_threshold = old_threshold * scaling_factor;
288        self.next_gc_threshold = 1 + new_threshold as usize;
289
290        if DEBUG_GC {
291            eprintln!(
292                "Scaled GC threshold from {} to {}",
293                old_threshold, self.next_gc_threshold
294            );
295        }
296    }
297
298    fn collect_garbage(&mut self, stack: &[Value], globals: &[Global]) {
299        if DEBUG_GC {
300            eprintln!("--- gc begin")
301        }
302
303        self.mark_roots(stack, globals);
304        self.mark_grey();
305        let shrinkable_to = self.sweep();
306
307        if let Some(new_size) = shrinkable_to {
308            self.shrink(new_size);
309        }
310
311        if DEBUG_GC {
312            // # of collections this round is inaccurate if we have DEBUG_GC_STRESS turned on, since we don't use the threshold
313            eprintln!(
314                "After collection | vec_size = {} | allocations = {} | collected = {}",
315                self.instances.len(),
316                self.allocations,
317                self.next_gc_threshold - self.allocations
318            );
319        }
320
321        self.rescale_threshold();
322
323        //self.unmarked = !self.unmarked; // Flip for the next gc run
324        if DEBUG_GC {
325            dbg!(&self.instances);
326            eprintln!("--- gc end")
327        }
328    }
329
330    pub fn new() -> GC {
331        GC {
332            grey_worklist: Vec::new(),
333            instances: Vec::new(),
334            free_slots: BinaryHeap::new(),
335            allocations: 0,
336            next_gc_threshold: INIT_GC_THRESHOLD,
337        }
338    }
339}