1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
use datasize::DataSize;
use super::graph::{StartIdx, SuccIdx};
/// A container of prefix paths, stored as a linked-list style tree such that common prefixes are
/// only stored once.
#[derive(Debug)]
pub(super) struct Paths {
/// Flat vector of [`PathNode`]s, stored compactly as 64-bit numbers. The bits are used as
/// follows:
///
/// | Bits | [`PathNode::Start`] | [`PathNode::Cons`] | [`PathNode::Empty`] |
/// |----------|---------------------|----------------------|---------------------|
/// | ` 0..32` | zeros | `last` ([`PathId`]) | `next` ([`PathId`]) |
/// | `32..63` | [`StartIdx`] | `succ` ([`SuccIdx`]) | zeros |
/// | `63..64` | zero | zero | one |
nodes: index_vec::IndexVec<PathId, u64>,
/// The first `num_starts` elements of `nodes` are all [`PathNode::Start`]s
num_starts: usize,
/// The index of an [`PathNode::Empty`] node. The empty nodes form a linked list, of which
/// this is the head.
first_empty: PathId,
/// Number of non-[`PathNode::Empty`] nodes
size: usize,
}
#[derive(Debug)]
enum PathNode {
/// This node has no predecessors and is the start of a path
Start(StartIdx),
/// This node extends the `PathNode` at the `usize` index by taking the [`SuccIdx`]th successor
/// of the currently reached node.
Cons { last: PathId, succ: SuccIdx },
/// This slot is empty and the next empty slot is at `next`
Empty { next: PathId },
}
impl Paths {
/// Crates an empty set of [`Paths`].
pub(super) fn new() -> Self {
Self {
nodes: index_vec::IndexVec::new(),
num_starts: 0,
first_empty: NULL_NODE_IDX, // No empty nodes in the linked list
size: 0,
}
}
/// Add a new [`PathNode::Start`].
///
/// # Panics
///
/// Panics if any non-start nodes have been `add`ed
pub(super) fn add_start(&mut self, start_idx: StartIdx) -> PathId {
assert_eq!(
self.nodes.len(),
self.num_starts,
"`add_start` called after other nodes were `add`ed."
);
self.size += 1;
self.num_starts += 1;
self.nodes.push(as_high_31_bits(start_idx.raw()))
}
/// Adds a new [`PathNode::Cons`] to this set of paths, returning its [`PathId`].
pub(super) fn add(&mut self, last: PathId, succ: SuccIdx) -> PathId {
// Generate the node data
let mut node_data = last.raw() as u64; // Put `last` into the low 32 bits
node_data |= as_high_31_bits(succ.raw()); // Put `succ` into the high 31 bits. Note that
// this also leaves the 'empty' bit still as `0`
self.size += 1; // It doesn't matter how the new node is added, the size always increases
if self.first_empty == NULL_NODE_IDX {
// No more empty nodes, so push to the vector
self.nodes.push(node_data)
} else {
// Get the index of the first empty node, and remove it from the head of the empty list
let empty_idx = self.first_empty;
let next_empty = match self.get(empty_idx) {
PathNode::Empty { next } => next,
_ => panic!("`next_empty` pointed to a non-empty node"),
};
self.first_empty = next_empty;
// Replace the empty node with the new node
self.nodes[empty_idx] = node_data;
empty_idx
}
}
/// Run a garbage collection pass to mark any unreachable nodes as 'empty'
pub(super) fn gc(&mut self, heads: impl IntoIterator<Item = PathId>) {
// Mark which nodes are reachable, by marking all the non-start nodes as 'empty' then
// marking any node reachable from `heads` as non-empty. Start nodes cannot be GCed.
for node_data in self.nodes.iter_mut().skip(self.num_starts) {
*node_data |= EMPTY_BIT_MASK;
}
for head in heads {
self.mark_path_as_non_empty(head);
}
// Rebuild the empty node list and count the number of non-empty nodes
self.size = self.nodes.len();
self.first_empty = NULL_NODE_IDX;
for (idx, node_data) in self.nodes.iter_mut_enumerated().skip(self.num_starts) {
let is_empty = *node_data & EMPTY_BIT_MASK != 0;
if is_empty {
// Push this node to the `empty` list
*node_data = EMPTY_BIT_MASK | (self.first_empty.raw() as u64);
self.first_empty = idx;
self.size -= 1;
}
}
}
/// Mark this [`PathId`] and everything it reaches as 'non-empty'
fn mark_path_as_non_empty(&mut self, head: PathId) {
// Skip this if we've already marked this node (and its descendants) as reachable
if self.nodes[head] & EMPTY_BIT_MASK == 0 {
return;
}
// Set this node's 'empty' bit to `0` (it would have been set to `1` at the start of the GC
// pass)
self.nodes[head] &= !EMPTY_BIT_MASK;
match self.get(head) {
PathNode::Start(_) => {} // We've finished this linked list
PathNode::Cons { last, succ: _ } => self.mark_path_as_non_empty(last),
PathNode::Empty { .. } => unreachable!("We just set the 'empty' bit to 0"),
}
}
/// Return the path which finishes at a given [`PathId`].
pub(super) fn flatten(&self, node_idx: PathId) -> (StartIdx, Vec<SuccIdx>) {
let mut succs = Vec::new();
let start_idx = self.flatten_recursive(node_idx, &mut succs);
(start_idx, succs)
}
fn flatten_recursive(&self, node_idx: PathId, succs: &mut Vec<SuccIdx>) -> StartIdx {
match self.get(node_idx) {
PathNode::Start(start_idx) => start_idx,
PathNode::Cons { last, succ } => {
let start_idx = self.flatten_recursive(last, succs);
succs.push(succ);
start_idx
}
PathNode::Empty { .. } => panic!("GCed nodes shouldn't be in a path"),
}
}
/// Gets the [`PathNode`] at a given [`PathId`].
fn get(&self, idx: PathId) -> PathNode {
let data = self.nodes[idx];
// Deconstruct the 64-bit value
let is_empty = (data & EMPTY_BIT_MASK) != 0; // Node is `Empty` iff `EMPTY_BIT` is 1
let is_start = idx.index() < self.num_starts; // Node is `Start` if it's within `num_starts`
let low_32_bits = PathId::from_raw(data as u32); // Read bits `0..32`
let high_31_bits = (data >> 32) as u32 & 0x7FFF_FFFF; // Read bits `32..63`
// Construct node
if is_start {
assert!(!is_empty); // Start nodes shouldn't be GCed
PathNode::Start(StartIdx::from_raw(high_31_bits))
} else if is_empty {
PathNode::Empty { next: low_32_bits }
} else {
PathNode::Cons {
last: low_32_bits,
succ: SuccIdx::from_raw(high_31_bits),
}
}
}
#[allow(dead_code)]
fn debug_nodes(&self) {
for (idx, data) in self.nodes.iter_enumerated() {
let empty_bit = data >> 63;
let high_31_bits = (data >> 32) & 0x7FFF_FFFF;
let low_32_bits = *data as u32;
println!(
"{:>4?}: {} {:031b} {:032b} -> {:?}",
idx.raw(),
empty_bit,
high_31_bits,
low_32_bits,
self.get(idx)
);
}
}
}
impl DataSize for Paths {
const IS_DYNAMIC: bool = true;
const STATIC_HEAP_SIZE: usize = 0;
fn estimate_heap_size(&self) -> usize {
std::mem::size_of::<Self>() + self.nodes.len() * std::mem::size_of::<u64>()
}
}
#[track_caller]
fn as_high_31_bits(v: u32) -> u64 {
assert_eq!(
v & (1 << 31),
0,
"High 31-bit values should be smaller than 2^31"
);
(v as u64) << 32
}
const EMPTY_BIT_MASK: u64 = 1 << 63; // Bits `63..64`
index_vec::define_index_type! {
#[derive(DataSize)]
pub struct PathId = u32;
}
const NULL_NODE_IDX: PathId = PathId::from_raw_unchecked(u32::MAX);