1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
use crate::phoenix_error;
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use crate::value::{HeapObj, HeapObjVal, Value};
use crate::vm::Global;
const DEBUG_GC: bool = false;
const DEBUG_STRESS_GC: bool = false;
const INIT_GC_THRESHOLD: usize = 200;
const MIN_SCALING_FACTOR: f32 = 0.5;
const MAX_SCALING_FACTOR: f32 = 2.0;
const SHRINK_THRESHOLD: f32 = 0.75; // Shrink if new_size < current_size * shrink_threshold => close to 1 means lots of shrinks, close to 0 means rarely shrink
/// The garbage collector. Let's go
///
/// Important note to make sure I never break the GC:
/// The only way we can deallocate something that we didnt mean to is if we have a PhoenixPointer somewhere other than the stack, globals, or in a reachable HeapObj
/// So be careful if we ever alloc a PhoenixClosure or a PhoenixInstance when a PhoenixPointer is floating around on the rust stack
/// => ie popping a value off, calling alloc, and then doing something with that value. If that value is the last pointer to an instance, it'll cause the instance to be deleted
///
/// Ideas for making this not have placeholder values / fewer placeholder values:
/// Steps to making it work:
/// 1. Use a linked list for instances
/// 2. When removing a value, replace the node with a placeholder
/// 3. When removing a value next to a placeholder, merge the two together and increment a counter in the placeholder
/// 4. When traversing the linked list, a placeholder is worth {counter} slots
/// 5. When allocating values, use the same queue but if it hits the middle of a placeholder, split it down the middle into two placeholders?
/// => Ideally we would not but since the free_slots stack is not ordered in any way we don't have any guarantees
///
/// This would get rid of most of the placeholder values but with a few problems
/// A. The memory usage of the placeholders is minimal already
/// B. Placeholders don't necessarily "leak", ie: running the program for a long enough time will not cause a memory shortage unless the code itself had used that much memory without GC'ing
/// C. Linked lists and doing those traversals will undoubtedly be slower than the current Vec implementation, will it even be worth it?
///
/// All in all, I think I'll need to wait until I have some code to profile.
#[derive(Debug, Clone)]
pub struct GC {
pub instances: Vec<HeapObj>,
/// The number of live allocations
allocations: usize,
/// The number of allocations allowed until we GC
next_gc_threshold: usize,
/// Each worklist task is an index into the instances vec for the HeapObj
grey_worklist: Vec<usize>,
/// A priority queue for which slots to allocate. A min-heap because we want to allocate the front slots of the instances vec first,
/// so that the later slots (which are still filled but just with placeholders) can be truncated in the cases where a users program allocates a large amount, drops them all, and then leavesthe instances vec full of placeholders
free_slots: BinaryHeap<Reverse<usize>>,
// Annoying to implement because new variables will get instantiated with the wrong value, possibly allowing them to live one extra round of GC
// unmarked: bool, // Which bool type represents an "unmarked" node
}
impl Default for GC {
fn default() -> Self {
Self {
instances: Vec::new(),
allocations: 0,
next_gc_threshold: INIT_GC_THRESHOLD,
grey_worklist: Vec::new(),
free_slots: BinaryHeap::new(),
}
}
}
impl PartialEq for GC {
fn eq(&self, other: &Self) -> bool {
self.instances == other.instances
&& self.allocations == other.allocations
&& self.next_gc_threshold == other.next_gc_threshold
&& self.grey_worklist == other.grey_worklist
}
}
impl GC {
pub fn alloc(&mut self, val: HeapObj, stack: &[Value], globals: &[Global]) -> Value {
if DEBUG_STRESS_GC || self.allocations >= self.next_gc_threshold {
self.collect_garbage(stack, globals);
}
self.instances.push(val); // Either way we need to put on the new instance
let index = if self.free_slots.is_empty() {
self.instances.len() - 1
} else {
match self.free_slots.pop() {
Some(Reverse(index)) => {
// Swap the instance over to it's slot and remove the placeholder that used to be there
let placeholder = self.instances.swap_remove(index);
if placeholder.obj != HeapObjVal::HeapPlaceholder {
phoenix_error!(
"VM error! GC attempted to use an allocated value as a free slot"
)
}
index
}
None => phoenix_error!("VM panic!"),
}
};
self.allocations += 1;
if DEBUG_GC {
eprintln!(
"allocated slot {} | # of allocations = {}",
index, self.allocations
)
}
Value::PhoenixPointer(index)
}
fn mark_heap_obj(&mut self, index: usize) {
let obj_opt = self.instances.get_mut(index);
match obj_opt {
Some(obj) => {
if !obj.is_marked {
// obj.is_marked = !self.unmarked;
obj.is_marked = true;
if DEBUG_GC {
eprintln!("marked {:?} at {}", obj.obj_type, index)
}
self.grey_worklist.push(index); // Only the values that are pointed to by PhoenixPointers (instances and closures) can contain values that might be garbage collected
}
}
None => panic!("VM panic! Why is there an unallocated pointer?"),
}
}
fn mark_value(&mut self, val: &Value) {
if let Value::PhoenixPointer(ptr) = val {
self.mark_heap_obj(*ptr);
}
}
fn mark_roots(&mut self, stack: &[Value], globals: &[Global]) {
for val in stack.iter() {
self.mark_value(val);
}
for val in globals.iter() {
if let Global::Init(v) = val {
self.mark_value(v);
}
}
}
fn mark_grey(&mut self) {
while !self.grey_worklist.is_empty() {
let index = self.grey_worklist.pop().unwrap();
let obj_opt = self.instances.get(index);
let mut to_mark = Vec::new();
match obj_opt {
Some(obj) => {
// Blacken -> Look for PhoenixPointers that might be stored in these HeapObjs
if DEBUG_GC {
eprintln!("blackening {:?} at {}", obj.obj_type, index)
}
match &obj.obj {
HeapObjVal::PhoenixClosure(closure) => {
for val in &closure.values {
if let Value::PhoenixPointer(ptr) = val {
to_mark.push(*ptr);
}
}
}
HeapObjVal::PhoenixInstance(instance) => {
for val in instance.fields.values() {
if let Value::PhoenixPointer(ptr) = val {
to_mark.push(*ptr);
}
}
}
HeapObjVal::PhoenixList(list) => {
for val in list.values.iter() {
if let Value::PhoenixPointer(ptr) = val {
to_mark.push(*ptr);
}
}
}
HeapObjVal::HeapPlaceholder => {
panic!("VM panic! Why do we have a valid reference to a heap placeholder value?")
}
}
}
None => panic!("VM panic! Why is there an unallocated pointer?"),
}
for ptr in to_mark.iter() {
self.mark_heap_obj(*ptr);
}
}
}
fn sweep(&mut self) -> Option<usize> {
let mut shrinkable_to: Option<usize> = None;
for (index, obj) in self.instances.iter_mut().enumerate() {
if obj.obj == HeapObjVal::HeapPlaceholder {
match shrinkable_to {
Some(_) => {}
None => shrinkable_to = Some(index),
}
} else {
// Since we hit a non-placeholder, we obviously can't shrink to less than the current index, so reset it to None
shrinkable_to = None;
// Now check if we need to free this obj
if !obj.is_marked {
if DEBUG_GC {
eprintln!(
"freed slot {} | # of allocations = {} | value to free = {:?}",
index, self.allocations, obj,
)
}
let mut placeholder = HeapObj::new_placeholder(); // Create a new placeholder to swap with the obj, which will get dropped at the end of this block
std::mem::swap(obj, &mut placeholder);
self.free_slots.push(Reverse(index));
self.allocations -= 1;
} else {
// Reset the is_marked flag for the next gc
obj.is_marked = false;
}
}
}
shrinkable_to
}
/// Shrink the instances vec as much as possible, but only if we will be removing above a given threshold # of placeholders
fn shrink(&mut self, new_size: usize) {
if (new_size as f32) < SHRINK_THRESHOLD * (self.instances.len() as f32) {
if DEBUG_GC {
eprintln!("shrinking from {} to {}", self.instances.len(), new_size)
}
self.instances.truncate(new_size);
// Make sure that we remove those indices from free_slots
let mut new_slots: BinaryHeap<Reverse<usize>> = BinaryHeap::new();
for x in self.free_slots.drain() {
match x {
Reverse(i) => {
if i < new_size {
new_slots.push(x)
}
}
}
}
self.free_slots = new_slots;
} else if DEBUG_GC {
eprintln!(
"not shrinking from {} to {}",
self.instances.len(),
new_size
)
}
}
/// Rescale the GC threshold. Called after all garbage collections
fn rescale_threshold(&mut self) {
// Now we know we went from self.next_gc_threshold # of instances down to self.allocations # of instances
// Use that difference to determine the next_gc_threshold
let diff = self.next_gc_threshold - self.allocations;
// 0 <= diff <= old_threshold
// If this number is small, then we have mostly live values, and we should let the heap grow quite a bit before we try to GC again
// => If diff = 0, double the threshold
// If this number is large, then we should GC more often
// => if diff = old_threshold, halve the threshold
let old_threshold = self.next_gc_threshold as f32;
let slope: f32 = (MIN_SCALING_FACTOR - MAX_SCALING_FACTOR) / old_threshold;
let scaling_factor = slope * (diff as f32) + MAX_SCALING_FACTOR;
let new_threshold = old_threshold * scaling_factor;
self.next_gc_threshold = 1 + new_threshold as usize;
if DEBUG_GC {
eprintln!(
"Scaled GC threshold from {} to {}",
old_threshold, self.next_gc_threshold
);
}
}
fn collect_garbage(&mut self, stack: &[Value], globals: &[Global]) {
if DEBUG_GC {
eprintln!("--- gc begin")
}
self.mark_roots(stack, globals);
self.mark_grey();
let shrinkable_to = self.sweep();
if let Some(new_size) = shrinkable_to {
self.shrink(new_size);
}
if DEBUG_GC {
// # of collections this round is inaccurate if we have DEBUG_GC_STRESS turned on, since we don't use the threshold
eprintln!(
"After collection | vec_size = {} | allocations = {} | collected = {}",
self.instances.len(),
self.allocations,
self.next_gc_threshold - self.allocations
);
}
self.rescale_threshold();
//self.unmarked = !self.unmarked; // Flip for the next gc run
if DEBUG_GC {
eprintln!("--- gc end")
}
}
pub fn new() -> GC {
GC {
grey_worklist: Vec::new(),
instances: Vec::new(),
free_slots: BinaryHeap::new(),
allocations: 0,
next_gc_threshold: INIT_GC_THRESHOLD,
}
}
}