phoenix_lang/gc.rs
1use crate::phoenix_error;
2use std::cmp::Reverse;
3use std::collections::BinaryHeap;
4
5use crate::value::{HeapObj, HeapObjVal, Value};
6use crate::vm::Global;
7
8const DEBUG_GC: bool = false;
9const DEBUG_STRESS_GC: bool = false;
10
11const INIT_GC_THRESHOLD: usize = 200;
12const MIN_SCALING_FACTOR: f32 = 0.5;
13const MAX_SCALING_FACTOR: f32 = 2.0;
14const SHRINK_THRESHOLD: f32 = 0.75; // Shrink if new_size < current_size * shrink_threshold => close to 1 means lots of shrinks, close to 0 means rarely shrink
15
16/// The garbage collector. Let's go
17///
18/// Important note to make sure I never break the GC:
19/// The only way we can deallocate something that we didnt mean to is if we have a PhoenixPointer somewhere other than the stack, globals, or in a reachable HeapObj
20/// So be careful if we ever alloc a PhoenixClosure or a PhoenixInstance when a PhoenixPointer is floating around on the rust stack
21/// => ie popping a value off, calling alloc, and then doing something with that value. If that value is the last pointer to an instance, it'll cause the instance to be deleted
22///
23/// Ideas for making this not have placeholder values / fewer placeholder values:
24/// Steps to making it work:
25/// 1. Use a linked list for instances
26/// 2. When removing a value, replace the node with a placeholder
27/// 3. When removing a value next to a placeholder, merge the two together and increment a counter in the placeholder
28/// 4. When traversing the linked list, a placeholder is worth {counter} slots
29/// 5. When allocating values, use the same queue but if it hits the middle of a placeholder, split it down the middle into two placeholders?
30/// => Ideally we would not but since the free_slots stack is not ordered in any way we don't have any guarantees
31///
32/// This would get rid of most of the placeholder values but with a few problems
33/// * A) The memory usage of the placeholders is minimal already
34/// * B) Placeholders don't necessarily "leak", ie: running the program for a long enough time will not cause a memory shortage unless the code itself had used that much memory without GC'ing
35/// * C) Linked lists and doing those traversals will undoubtedly be slower than the current Vec implementation, will it even be worth it?
36///
37/// All in all, I think I'll need to wait until I have some code to profile.
38#[derive(Debug, Clone)]
39pub struct GC {
40 pub instances: Vec<HeapObj>,
41 /// The number of live allocations
42 allocations: usize,
43 /// The number of allocations allowed until we GC
44 next_gc_threshold: usize,
45 /// Each worklist task is an index into the instances vec for the HeapObj
46 grey_worklist: Vec<usize>,
47 /// A priority queue for which slots to allocate. A min-heap because we want to allocate the front slots of the instances vec first,
48 /// so that the later slots (which are still filled but just with placeholders) can be truncated in the cases where a users program allocates a large amount, drops them all, and then leavesthe instances vec full of placeholders
49 free_slots: BinaryHeap<Reverse<usize>>,
50 // Annoying to implement because new variables will get instantiated with the wrong value, possibly allowing them to live one extra round of GC
51 // unmarked: bool, // Which bool type represents an "unmarked" node
52}
53
54impl Default for GC {
55 fn default() -> Self {
56 Self {
57 instances: Vec::new(),
58 allocations: 0,
59 next_gc_threshold: INIT_GC_THRESHOLD,
60 grey_worklist: Vec::new(),
61 free_slots: BinaryHeap::new(),
62 }
63 }
64}
65
66impl PartialEq for GC {
67 fn eq(&self, other: &Self) -> bool {
68 self.instances == other.instances
69 && self.allocations == other.allocations
70 && self.next_gc_threshold == other.next_gc_threshold
71 && self.grey_worklist == other.grey_worklist
72 }
73}
74
75impl GC {
76 pub fn alloc(&mut self, val: HeapObj, stack: &[Value], globals: &[Global]) -> Value {
77 if DEBUG_STRESS_GC || self.allocations >= self.next_gc_threshold {
78 self.collect_garbage(stack, globals);
79 }
80
81 self.instances.push(val); // Either way we need to put on the new instance
82 let index = if self.free_slots.is_empty() {
83 self.instances.len() - 1
84 } else {
85 match self.free_slots.pop() {
86 Some(Reverse(index)) => {
87 // Swap the instance over to it's slot and remove the placeholder that used to be there
88 let placeholder = self.instances.swap_remove(index);
89 if placeholder.obj != HeapObjVal::HeapPlaceholder {
90 phoenix_error!(
91 "VM error! GC attempted to use an allocated value as a free slot"
92 )
93 }
94 index
95 }
96 None => phoenix_error!("VM panic!"),
97 }
98 };
99
100 self.allocations += 1;
101 if DEBUG_GC {
102 eprintln!(
103 "allocated slot {} | # of allocations = {}",
104 index, self.allocations
105 )
106 }
107 Value::PhoenixPointer(index)
108 }
109
110 fn mark_heap_obj(&mut self, index: usize) {
111 let obj_opt = self.instances.get_mut(index);
112 match obj_opt {
113 Some(obj) => {
114 if !obj.is_marked {
115 // obj.is_marked = !self.unmarked;
116 obj.is_marked = true;
117 if DEBUG_GC {
118 eprintln!("marked {:?} at {}", obj.obj_type, index)
119 }
120 self.grey_worklist.push(index); // Only the values that are pointed to by PhoenixPointers (instances and closures) can contain values that might be garbage collected
121 }
122 }
123 None => panic!("VM panic! Why is there an unallocated pointer?"),
124 }
125 }
126
127 fn mark_value(&mut self, val: &Value) {
128 if let Value::PhoenixPointer(ptr) = val {
129 self.mark_heap_obj(*ptr);
130 }
131 }
132
133 fn mark_roots(&mut self, stack: &[Value], globals: &[Global]) {
134 for val in stack.iter() {
135 self.mark_value(val);
136 }
137
138 for val in globals.iter() {
139 if let Global::Init(v) = val {
140 self.mark_value(v);
141 }
142 }
143 }
144
145 fn mark_grey(&mut self) {
146 while let Some(index) = self.grey_worklist.pop() {
147 let obj_opt = self.instances.get(index);
148 let mut to_mark = Vec::new();
149
150 match obj_opt {
151 Some(obj) => {
152 // Blacken -> Look for PhoenixPointers that might be stored in these HeapObjs
153 if DEBUG_GC {
154 eprintln!("blackening {:?} at {}", obj.obj_type, index)
155 }
156 match &obj.obj {
157 HeapObjVal::PhoenixClosure(closure) => {
158 for val in &closure.values {
159 if let Value::PhoenixPointer(ptr) = val {
160 to_mark.push(*ptr);
161 }
162 }
163 }
164 HeapObjVal::PhoenixInstance(instance) => {
165 for val in instance.fields.values() {
166 if let Value::PhoenixPointer(ptr) = val {
167 to_mark.push(*ptr);
168 }
169 }
170 }
171 HeapObjVal::PhoenixList(list) => {
172 for val in list.values.iter() {
173 if let Value::PhoenixPointer(ptr) = val {
174 to_mark.push(*ptr);
175 }
176 }
177 }
178 HeapObjVal::HeapPlaceholder => {
179 panic!("VM panic! Why do we have a valid reference to a heap placeholder value?")
180 }
181 HeapObjVal::PhoenixString(_string) => {
182 // to_mark.push(string.ptr);
183 }
184 HeapObjVal::PhoenixHashMap(map) => {
185 for val in &map.map {
186 if let Value::PhoenixPointer(ptr) = val.0 {
187 to_mark.push(*ptr);
188 }
189 if let Value::PhoenixPointer(ptr) = val.1 {
190 to_mark.push(*ptr);
191 }
192 }
193 }
194 }
195 }
196 None => panic!("VM panic! Why is there an unallocated pointer?"),
197 }
198
199 for ptr in to_mark.iter() {
200 self.mark_heap_obj(*ptr);
201 }
202 }
203 }
204
205 fn sweep(&mut self) -> Option<usize> {
206 let mut shrinkable_to: Option<usize> = None;
207
208 for (index, obj) in self.instances.iter_mut().enumerate() {
209 if obj.obj == HeapObjVal::HeapPlaceholder {
210 match shrinkable_to {
211 Some(_) => {}
212 None => shrinkable_to = Some(index),
213 }
214 } else {
215 // Since we hit a non-placeholder, we obviously can't shrink to less than the current index, so reset it to None
216 shrinkable_to = None;
217
218 // Now check if we need to free this obj
219 if !obj.is_marked {
220 if DEBUG_GC {
221 eprintln!(
222 "freed slot {} | # of allocations = {} | value to free = {:?}",
223 index, self.allocations, obj,
224 )
225 }
226
227 let mut placeholder = HeapObj::new_placeholder(); // Create a new placeholder to swap with the obj, which will get dropped at the end of this block
228 std::mem::swap(obj, &mut placeholder);
229
230 self.free_slots.push(Reverse(index));
231 self.allocations -= 1;
232 } else {
233 // Reset the is_marked flag for the next gc
234 obj.is_marked = false;
235 }
236 }
237 }
238
239 shrinkable_to
240 }
241
242 /// Shrink the instances vec as much as possible, but only if we will be removing above a given threshold # of placeholders
243 fn shrink(&mut self, new_size: usize) {
244 if (new_size as f32) < SHRINK_THRESHOLD * (self.instances.len() as f32) {
245 if DEBUG_GC {
246 eprintln!("shrinking from {} to {}", self.instances.len(), new_size)
247 }
248 self.instances.truncate(new_size);
249
250 // Make sure that we remove those indices from free_slots
251 let mut new_slots: BinaryHeap<Reverse<usize>> = BinaryHeap::new();
252 for x in self.free_slots.drain() {
253 match x {
254 Reverse(i) => {
255 if i < new_size {
256 new_slots.push(x)
257 }
258 }
259 }
260 }
261 self.free_slots = new_slots;
262 } else if DEBUG_GC {
263 eprintln!(
264 "not shrinking from {} to {}",
265 self.instances.len(),
266 new_size
267 )
268 }
269 }
270
271 /// Rescale the GC threshold. Called after all garbage collections
272 fn rescale_threshold(&mut self) {
273 // Now we know we went from self.next_gc_threshold # of instances down to self.allocations # of instances
274 // Use that difference to determine the next_gc_threshold
275 let diff = self.next_gc_threshold - self.allocations;
276
277 // 0 <= diff <= old_threshold
278 // If this number is small, then we have mostly live values, and we should let the heap grow quite a bit before we try to GC again
279 // => If diff = 0, double the threshold
280
281 // If this number is large, then we should GC more often
282 // => if diff = old_threshold, halve the threshold
283
284 let old_threshold = self.next_gc_threshold as f32;
285 let slope: f32 = (MIN_SCALING_FACTOR - MAX_SCALING_FACTOR) / old_threshold;
286 let scaling_factor = slope * (diff as f32) + MAX_SCALING_FACTOR;
287 let new_threshold = old_threshold * scaling_factor;
288 self.next_gc_threshold = 1 + new_threshold as usize;
289
290 if DEBUG_GC {
291 eprintln!(
292 "Scaled GC threshold from {} to {}",
293 old_threshold, self.next_gc_threshold
294 );
295 }
296 }
297
298 fn collect_garbage(&mut self, stack: &[Value], globals: &[Global]) {
299 if DEBUG_GC {
300 eprintln!("--- gc begin")
301 }
302
303 self.mark_roots(stack, globals);
304 self.mark_grey();
305 let shrinkable_to = self.sweep();
306
307 if let Some(new_size) = shrinkable_to {
308 self.shrink(new_size);
309 }
310
311 if DEBUG_GC {
312 // # of collections this round is inaccurate if we have DEBUG_GC_STRESS turned on, since we don't use the threshold
313 eprintln!(
314 "After collection | vec_size = {} | allocations = {} | collected = {}",
315 self.instances.len(),
316 self.allocations,
317 self.next_gc_threshold - self.allocations
318 );
319 }
320
321 self.rescale_threshold();
322
323 //self.unmarked = !self.unmarked; // Flip for the next gc run
324 if DEBUG_GC {
325 dbg!(&self.instances);
326 eprintln!("--- gc end")
327 }
328 }
329
330 pub fn new() -> GC {
331 GC {
332 grey_worklist: Vec::new(),
333 instances: Vec::new(),
334 free_slots: BinaryHeap::new(),
335 allocations: 0,
336 next_gc_threshold: INIT_GC_THRESHOLD,
337 }
338 }
339}