1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
//! Hulunbuir is a cross-thread garbage collector. The managed objects could be used in
//! multithreads, and collecting process may happen in any of them.
//!
//! Normally, reading or updating a managed object must lock global collector as well,
//! which significantly decrease multithread performance. However, Hulunbuir does not provide
//! common "read guard" and "write guard" interface; instead it only supports two functions:
//! `allocate` and `replace`. The first one create a managed object, and may trigger a garbage
//! collecting process if necessary; the second one replace the value of a managed object with
//! a new one provided by argument. The global collector only have to be locked during replacing
//! and the lock could be released when working thread owns the value. So the lock will not
//! become the bottleneck of performance.
//!
//! Hulunbuir also provides `Slot` as higher level abstraction and interface.
//!
//! # Basic usage
//!
//! ```
//! use hulunbuir::{Address, Collector, Keep};
//!
//! // create a managed type
//! struct ListNode(i32, Option<Address>);
//!
//! // implement Keep for it, so it could be managed
//! impl Keep for ListNode {
//!     fn with_keep<F: FnMut(&Address)>(&self, mut keep: F) {
//!         // each node keeps only its tail, so call `keep` with it...
//!         if let Some(tail) = &self.1 {
//!             // ...if the node has tail
//!             keep(tail)
//!         }
//!     }
//! }
//!
//! fn main() {
//!     // create a collector with 128 slots available
//!     let mut collector = Collector::new(128);
//!     let root = collector.allocate(ListNode(0, None)).unwrap();
//!     collector.set_root(root.clone());
//!     let tail = collector.allocate(ListNode(1, None)).unwrap();
//!     // replace root node out with something not important
//!     let mut root_node = collector.replace(&root, ListNode(42, None)).unwrap();
//!     root_node.1 = Some(tail);
//!     // replace root node back
//!     let _ = collector.replace(&root, root_node).unwrap();
//!     
//!     let _orphan = collector.allocate(ListNode(2, None)).unwrap();
//!     // before collecting...
//!     assert_eq!(collector.alive_count(), 3);
//!     collector.collect();
//!     // after collecting...
//!     assert_eq!(collector.alive_count(), 2);
//! }
//! ```
//!
//! This `replace`-based object updating strategy is suitable for simple single-thread usage.
//! The collector will work correctly **only when no garbage collection happens when any
//! "real" object is replaced out**, which means, when any of them *is* replaced out:
//! * no explicit calling to `Collector::collect`
//! * no calling to `Collector::allocate`, since it may trigger collection as well if there's
//! no slot available
//!
//! In multithreading context, none of above could be archieved since each thread has no idea
//! about what the others are doing. So more complicated strategy must be introduced. Hulunbuir
//! provides `slot` module for this purpose, but you are free to develop your own one.

/// Errors.
pub mod error;
/// Slot-based abstraction for automatic dependency caching and thread parking.
pub mod slot;

use std::collections::HashMap;
use std::mem;
use std::time::Instant;

pub use crate::error::Error;

#[macro_use]
extern crate failure_derive;

use log::info;

/// Memory manager for allocation and garbage collection.
///
/// See module level document for basic usage.
#[derive(Debug)]
pub struct Collector<T> {
    slots: HashMap<Address, Slot<T>>,
    slot_max: usize,
    next_id: usize,
    root: Option<Address>,
}

/// Virtual memory address token.
#[derive(Hash, PartialEq, Eq, Clone, Debug)]
pub struct Address(usize);

/// Required trait for managed objects' type.
pub trait Keep {
    /// When this method is called, it should calls back `keep` with the addresses of objects 
    /// that this object wishes to keep, one per calling. If current object is considered
    /// as alive in a garbage collecting pass (probably since this method is called), then
    /// all the kept objects will also be considered as alive.
    ///
    /// If this method is not implemented properly, such as not calling `keep` or calling it
    /// with insufficient addresses, `Memory::InvalidAddress` may be thrown in arbitrary time
    /// in the future.
    ///
    /// There's no reason for this method to fail. Please panic if you have to.
    fn with_keep<F: FnMut(&Address)>(&self, keep: F);
}

impl<T> Collector<T> {
    /// Create a collector with `slot_max` slots available. Each slot is able to store a managed
    /// object typed `T`.
    pub fn new(slot_max: usize) -> Self {
        Self {
            slots: HashMap::new(),
            slot_max,
            next_id: 0,
            root: None,
        }
    }

    /// Replace the value of object at `address` with `value`. Return the original value of
    /// managed object. If there's no object at `address` (maybe the object there has been
    /// collected), throw `Error::InvalidAddress`.
    pub fn replace(&mut self, address: &Address, value: T) -> Result<T, Error> {
        let slot = self.slots.get_mut(address).ok_or(Error::InvalidAddress)?;
        let content = mem::replace(&mut slot.content, value);
        Ok(content)
    }

    /// Set object at `address` as root object. Only root object and objects kept by any
    /// object that has been considered as alive object in the current collecting pass
    /// will stay alive during garbage collection.
    pub fn set_root(&mut self, address: Address) {
        self.root = Some(address);
    }

    /// Return current root object. If no root object is set, return `None`, and every object
    /// will be collected if a collecting pass is triggered.
    pub fn root(&self) -> &Option<Address> {
        &self.root
    }

    /// Return the total number of managed objects. Some of them may already be dead and will
    /// be collected in the following garbage collection.
    pub fn alive_count(&self) -> usize {
        self.slots.len()
    }
}

#[derive(Debug)]
struct Slot<T> {
    mark: bool,
    content: T,
}

impl<T: Keep> Collector<T> {
    /// Create a new managed object with `value`. If there's no available slot a garbage
    /// collecting pass will be triggered. If there's still no available slot then
    /// `Error::OutOfSlot` will be thrown. Any error thrown by collecting process
    /// will be re-thrown.
    pub fn allocate(&mut self, value: T) -> Result<Address, Error> {
        if self.slots.len() == self.slot_max {
            self.collect()?;
        }
        if self.slots.len() == self.slot_max {
            return Err(Error::OutOfSlots);
        }
        let address = Address(self.next_id);
        self.next_id += 1;
        self.slots.insert(
            address.clone(),
            Slot {
                mark: false,
                content: value,
            },
        );
        Ok(address)
    }

    /// Clean up all dead objects, which are unreachable from root object, or all objects
    /// if the root object is not set. If root object address is invalid, or any alive object
    /// keeps an object at invalid address, then `Memory::InvalidAddress` will be thrown.
    ///
    /// This method will be invoked if `Collector::allocate` is called but no slot is available,
    /// but it could also be explicit called by user. Statistics log will be printed after
    /// each collecting pass.
    pub fn collect(&mut self) -> Result<(), Error> {
        let start = Instant::now();

        let mut stack = Vec::new();
        if let Some(address) = &self.root {
            stack.push(address.to_owned());
        }
        while let Some(address) = stack.pop() {
            let slot = self.slots.get_mut(&address).ok_or(Error::InvalidAddress)?;
            if slot.mark {
                continue;
            }
            slot.mark = true;
            slot.content.with_keep(|address| {
                stack.push(address.to_owned());
            });
        }
        let mut alive_slots = HashMap::new();
        for (address, slot) in mem::replace(&mut self.slots, HashMap::new()).into_iter() {
            if slot.mark {
                alive_slots.insert(
                    address,
                    Slot {
                        mark: false,
                        content: slot.content,
                    },
                );
            }
        }
        self.slots = alive_slots;

        info!(
            target: "hulunbuir",
            "garbage collected in {} ms, {:.2}% of available slots used",
            start.elapsed().as_micros() as f32 / 1000.0,
            self.slots.len() as f32 / self.slot_max as f32 * 100.0
        );
        Ok(())
    }
}